sportdb 1.6.12 → 1.6.13
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +1 -0
- data/lib/sportdb.rb +1 -0
- data/lib/sportdb/models/event.rb +1 -1
- data/lib/sportdb/models/league.rb +2 -0
- data/lib/sportdb/models/person.rb +8 -0
- data/lib/sportdb/models/track.rb +3 -1
- data/lib/sportdb/reader.rb +173 -32
- data/lib/sportdb/schema.rb +3 -1
- data/lib/sportdb/title.rb +146 -0
- data/lib/sportdb/utils.rb +157 -131
- data/lib/sportdb/version.rb +1 -1
- metadata +10 -9
data/Manifest.txt
CHANGED
data/lib/sportdb.rb
CHANGED
data/lib/sportdb/models/event.rb
CHANGED
@@ -28,6 +28,14 @@ class Person < ActiveRecord::Base
|
|
28
28
|
if value =~ /^[a-z]{2}$/ ## assume two-letter country key e.g. at,de,mx,etc.
|
29
29
|
value_country = Country.find_by_key!( value )
|
30
30
|
new_attributes[ :country_id ] = value_country.id
|
31
|
+
elsif value =~ /^[A-Z]{3}$/ ## assume three-letter code e.g. AUS, MAL, etc.
|
32
|
+
new_attributes[ :code ] = value
|
33
|
+
elsif value =~ /^([0-9]{1,2})\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s([0-9]{4})$/ ## assume birthday
|
34
|
+
value_date_str = '%02d/%s/%d' % [$1, $2, $3] ## move to matcher!!
|
35
|
+
value_date = Date.strptime( value_date_str, '%d/%b/%Y' ) ## %b - abbreviated month name (e.g. Jan,Feb, etc.)
|
36
|
+
logger.debug " birthday #{value_date_str} - #{value_date}"
|
37
|
+
new_attributes[ :born_at ] = value_date
|
38
|
+
## todo: convert to date
|
31
39
|
else
|
32
40
|
## todo: assume title2 ??
|
33
41
|
## assume title2 if title2 is empty (not already in use)
|
data/lib/sportdb/models/track.rb
CHANGED
@@ -13,7 +13,7 @@ class Track < ActiveRecord::Base
|
|
13
13
|
### fix: move known_tracks_table to event!!! (e.g. scoped by event)
|
14
14
|
|
15
15
|
def self.known_tracks_table
|
16
|
-
@@known_tracks_table ||=
|
16
|
+
@@known_tracks_table ||= TextUtils.build_title_table_for( Track.all )
|
17
17
|
end # method known_tracks_table
|
18
18
|
|
19
19
|
|
@@ -27,6 +27,8 @@ class Track < ActiveRecord::Base
|
|
27
27
|
if value =~ /^[a-z]{2}$/ ## assume two-letter country key e.g. at,de,mx,etc.
|
28
28
|
value_country = Country.find_by_key!( value )
|
29
29
|
new_attributes[ :country_id ] = value_country.id
|
30
|
+
elsif value =~ /^[A-Z]{3}$/ ## assume three-letter code e.g. AUS, MAL, etc.
|
31
|
+
new_attributes[ :code ] = value
|
30
32
|
else
|
31
33
|
## todo: assume title2 ??
|
32
34
|
## assume title2 if title2 is empty (not already in use)
|
data/lib/sportdb/reader.rb
CHANGED
@@ -94,7 +94,10 @@ class Reader
|
|
94
94
|
elsif name =~ /\/squads/ || name =~ /\/rosters/ # e.g. 2013/squads.txt in formula1.db
|
95
95
|
load_rosters( name )
|
96
96
|
elsif name =~ /\/([0-9]{2})-/
|
97
|
-
|
97
|
+
race_pos = $1.to_i
|
98
|
+
# NB: assume @event is set from previous load
|
99
|
+
race = Race.find_by_event_id_and_pos( @event.id, race_pos )
|
100
|
+
load_records( name, race_id: race.id ) # e.g. 2013/04-gp-monaco.txt in formula1.db
|
98
101
|
elsif name =~ /^seasons/
|
99
102
|
load_seasons( name )
|
100
103
|
elsif name =~ /^leagues/
|
@@ -187,7 +190,11 @@ class Reader
|
|
187
190
|
|
188
191
|
def load_seasons( name )
|
189
192
|
|
190
|
-
|
193
|
+
path = "#{include_path}/#{name}.yml"
|
194
|
+
|
195
|
+
logger.info "parsing data '#{name}' (#{path})..."
|
196
|
+
|
197
|
+
reader = HashReader.new( path )
|
191
198
|
|
192
199
|
####
|
193
200
|
## fix!!!!!
|
@@ -207,6 +214,7 @@ class Reader
|
|
207
214
|
value.each do |item|
|
208
215
|
season_attribs = {}
|
209
216
|
|
217
|
+
logger.debug " find season key: #{item.to_s.strip}"
|
210
218
|
season = Season.find_by_key( item.to_s.strip )
|
211
219
|
|
212
220
|
## check if it exists
|
@@ -231,9 +239,47 @@ class Reader
|
|
231
239
|
|
232
240
|
end # each key,value
|
233
241
|
|
242
|
+
Prop.create_from_fixture!( name, path )
|
243
|
+
|
234
244
|
end # load_seasons
|
235
245
|
|
236
246
|
|
247
|
+
def fetch_event( name )
|
248
|
+
# get/fetch/find event from yml file
|
249
|
+
|
250
|
+
path = "#{include_path}/#{name}.yml"
|
251
|
+
|
252
|
+
logger.info "parsing data '#{name}' (#{path})..."
|
253
|
+
|
254
|
+
|
255
|
+
## todo/fix: use h = HashFile.load( path ) or similar instead of HashReader!!
|
256
|
+
|
257
|
+
reader = HashReader.new( path )
|
258
|
+
|
259
|
+
event_attribs = {}
|
260
|
+
|
261
|
+
reader.each_typed do |key, value|
|
262
|
+
|
263
|
+
## puts "processing event attrib >>#{key}<< >>#{value}<<..."
|
264
|
+
|
265
|
+
if key == 'league'
|
266
|
+
league = League.find_by_key!( value.to_s.strip )
|
267
|
+
event_attribs[ 'league_id' ] = league.id
|
268
|
+
elsif key == 'season'
|
269
|
+
season = Season.find_by_key!( value.to_s.strip )
|
270
|
+
event_attribs[ 'season_id' ] = season.id
|
271
|
+
else
|
272
|
+
# skip; do nothing
|
273
|
+
end
|
274
|
+
end # each key,value
|
275
|
+
|
276
|
+
league_id = event_attribs['league_id']
|
277
|
+
season_id = event_attribs['season_id']
|
278
|
+
|
279
|
+
event = Event.find_by_league_id_and_season_id!( league_id, season_id )
|
280
|
+
event
|
281
|
+
end
|
282
|
+
|
237
283
|
|
238
284
|
def load_event( name )
|
239
285
|
|
@@ -243,7 +289,11 @@ class Reader
|
|
243
289
|
## use Event.create_or_update_from_hash_reader?? or similar
|
244
290
|
# move parsing code to model
|
245
291
|
|
246
|
-
|
292
|
+
path = "#{include_path}/#{name}.yml"
|
293
|
+
|
294
|
+
logger.info "parsing data '#{name}' (#{path})..."
|
295
|
+
|
296
|
+
reader = HashReader.new( path )
|
247
297
|
|
248
298
|
event_attribs = {}
|
249
299
|
|
@@ -306,7 +356,12 @@ class Reader
|
|
306
356
|
|
307
357
|
end # each key,value
|
308
358
|
|
309
|
-
|
359
|
+
league_id = event_attribs['league_id']
|
360
|
+
season_id = event_attribs['season_id']
|
361
|
+
|
362
|
+
logger.debug "find event - league_id: #{league_id}, season_id: #{season_id}"
|
363
|
+
|
364
|
+
event = Event.find_by_league_id_and_season_id( league_id, season_id )
|
310
365
|
|
311
366
|
## check if it exists
|
312
367
|
if event.present?
|
@@ -320,6 +375,8 @@ class Reader
|
|
320
375
|
|
321
376
|
event.update_attributes!( event_attribs )
|
322
377
|
|
378
|
+
Prop.create_from_fixture!( name, path )
|
379
|
+
|
323
380
|
end # load_event
|
324
381
|
|
325
382
|
|
@@ -351,7 +408,7 @@ class Reader
|
|
351
408
|
|
352
409
|
|
353
410
|
|
354
|
-
def load_records( name )
|
411
|
+
def load_records( name, more_attribs={} )
|
355
412
|
path = "#{include_path}/#{name}.txt"
|
356
413
|
|
357
414
|
logger.info "parsing data '#{name}' (#{path})..."
|
@@ -364,30 +421,66 @@ class Reader
|
|
364
421
|
# @known_tracks = Track.known_tracks_table
|
365
422
|
|
366
423
|
## fix: add @known_teams - for now; use teams (not scoped by event)
|
424
|
+
@known_teams = TextUtils.build_title_table_for( Team.all )
|
425
|
+
## and for now use all persons
|
426
|
+
@known_persons = TextUtils.build_title_table_for( Person.all )
|
367
427
|
|
368
|
-
load_records_worker( reader )
|
428
|
+
load_records_worker( reader, more_attribs )
|
369
429
|
|
370
430
|
Prop.create_from_fixture!( name, path )
|
371
431
|
end
|
372
432
|
|
373
|
-
def load_records_worker( reader )
|
433
|
+
def load_records_worker( reader, more_attribs )
|
374
434
|
|
375
435
|
reader.each_line do |line|
|
376
436
|
logger.debug " line: >#{line}<"
|
377
437
|
|
378
|
-
|
379
|
-
# pos = find_game_pos!( line ) # alias -> rename to find_pos! or better use find_leading_pos!( line )
|
438
|
+
cut_off_end_of_line_comment!( line )
|
380
439
|
|
381
|
-
|
440
|
+
state = find_record_leading_state!( line )
|
382
441
|
|
383
|
-
|
442
|
+
map_team!( line )
|
443
|
+
team_key = find_team!( line )
|
444
|
+
team = Team.find_by_key!( team_key )
|
445
|
+
|
446
|
+
map_person!( line )
|
447
|
+
person_key = find_person!( line )
|
448
|
+
person = Person.find_by_key!( person_key )
|
449
|
+
|
450
|
+
timeline = find_record_timeline!( line )
|
451
|
+
|
452
|
+
laps = find_record_laps!( line )
|
453
|
+
|
454
|
+
comment = find_record_comment!( line )
|
455
|
+
|
456
|
+
logger.debug " line2: >#{line}<"
|
384
457
|
|
385
458
|
record_attribs = {
|
386
|
-
|
387
|
-
|
459
|
+
state: state,
|
460
|
+
## team_id: team.id, ## NB: not needed for db
|
461
|
+
person_id: person.id,
|
462
|
+
timeline: timeline,
|
463
|
+
comment: comment,
|
464
|
+
laps: laps
|
388
465
|
}
|
389
466
|
|
390
|
-
|
467
|
+
record_attribs = record_attribs.merge( more_attribs )
|
468
|
+
|
469
|
+
### check if record exists
|
470
|
+
record = Record.find_by_race_id_and_person_id( record_attribs[ :race_id ],
|
471
|
+
record_attribs[ :person_id ])
|
472
|
+
|
473
|
+
if record.present?
|
474
|
+
logger.debug "update Record #{record.id}:"
|
475
|
+
else
|
476
|
+
logger.debug "create Record:"
|
477
|
+
record = Record.new
|
478
|
+
end
|
479
|
+
|
480
|
+
logger.debug record_attribs.to_json
|
481
|
+
|
482
|
+
record.update_attributes!( record_attribs )
|
483
|
+
|
391
484
|
end # lines.each
|
392
485
|
|
393
486
|
end # method load_record_worker
|
@@ -407,11 +500,15 @@ class Reader
|
|
407
500
|
# @known_tracks = Track.known_tracks_table
|
408
501
|
|
409
502
|
## fix: add @known_teams - for now; use teams (not scoped by event)
|
503
|
+
## for now use all teams
|
504
|
+
@known_teams = TextUtils.build_title_table_for( Team.all )
|
505
|
+
## and for now use all persons
|
506
|
+
@known_persons = TextUtils.build_title_table_for( Person.all )
|
507
|
+
|
410
508
|
|
411
509
|
load_rosters_worker( reader )
|
412
510
|
|
413
|
-
Prop.create_from_fixture!( name, path )
|
414
|
-
|
511
|
+
Prop.create_from_fixture!( name, path )
|
415
512
|
end
|
416
513
|
|
417
514
|
def load_rosters_worker( reader )
|
@@ -419,19 +516,40 @@ class Reader
|
|
419
516
|
reader.each_line do |line|
|
420
517
|
logger.debug " line: >#{line}<"
|
421
518
|
|
422
|
-
|
423
|
-
|
519
|
+
cut_off_end_of_line_comment!( line )
|
520
|
+
|
521
|
+
pos = find_leading_pos!( line )
|
522
|
+
|
523
|
+
map_team!( line )
|
524
|
+
team_key = find_team!( line )
|
525
|
+
team = Team.find_by_key!( team_key )
|
526
|
+
|
527
|
+
map_person!( line )
|
528
|
+
person_key = find_person!( line )
|
529
|
+
person = Person.find_by_key!( person_key )
|
424
530
|
|
425
|
-
|
531
|
+
logger.debug " line2: >#{line}<"
|
426
532
|
|
427
|
-
|
533
|
+
### check if roster record exists
|
534
|
+
roster = Roster.find_by_event_id_and_team_id_and_person_id( @event.id, team.id, person.id )
|
535
|
+
|
536
|
+
if roster.present?
|
537
|
+
logger.debug "update Roster #{roster.id}:"
|
538
|
+
else
|
539
|
+
logger.debug "create Roster:"
|
540
|
+
roster = Roster.new
|
541
|
+
end
|
428
542
|
|
429
543
|
roster_attribs = {
|
430
|
-
pos:
|
431
|
-
|
544
|
+
pos: pos,
|
545
|
+
team_id: team.id,
|
546
|
+
person_id: person.id,
|
547
|
+
event_id: @event.id # NB: reuse/fallthrough from races - make sure load_races goes first (to setup event)
|
432
548
|
}
|
433
549
|
|
434
|
-
|
550
|
+
logger.debug roster_attribs.to_json
|
551
|
+
|
552
|
+
roster.update_attributes!( roster_attribs )
|
435
553
|
end # lines.each
|
436
554
|
|
437
555
|
end # method load_rosters_worker
|
@@ -439,6 +557,11 @@ class Reader
|
|
439
557
|
|
440
558
|
|
441
559
|
def load_races( name )
|
560
|
+
load_event( name ) # must have .yml file with same name for event definition
|
561
|
+
@event = fetch_event( name )
|
562
|
+
|
563
|
+
logger.info " event: #{@event.key} >>#{@event.full_title}<<"
|
564
|
+
|
442
565
|
path = "#{include_path}/#{name}.txt"
|
443
566
|
|
444
567
|
logger.info "parsing data '#{name}' (#{path})..."
|
@@ -453,28 +576,46 @@ class Reader
|
|
453
576
|
load_races_worker( reader )
|
454
577
|
|
455
578
|
Prop.create_from_fixture!( name, path )
|
456
|
-
|
457
579
|
end
|
458
580
|
|
581
|
+
|
459
582
|
def load_races_worker( reader )
|
460
583
|
|
461
584
|
reader.each_line do |line|
|
462
585
|
logger.debug " line: >#{line}<"
|
463
586
|
|
464
|
-
|
465
|
-
|
587
|
+
cut_off_end_of_line_comment!( line )
|
588
|
+
|
589
|
+
pos = find_leading_pos!( line )
|
466
590
|
|
467
|
-
|
591
|
+
map_track!( line )
|
468
592
|
track_key = find_track!( line )
|
593
|
+
track = Track.find_by_key!( track_key )
|
594
|
+
|
469
595
|
date = find_date!( line )
|
470
|
-
|
596
|
+
|
597
|
+
logger.debug " line2: >#{line}<"
|
598
|
+
|
599
|
+
### check if games exists
|
600
|
+
race = Race.find_by_event_id_and_track_id( @event.id, track.id )
|
601
|
+
|
602
|
+
if race.present?
|
603
|
+
logger.debug "update race #{race.id}:"
|
604
|
+
else
|
605
|
+
logger.debug "create race:"
|
606
|
+
race = Race.new
|
607
|
+
end
|
608
|
+
|
471
609
|
race_attribs = {
|
472
|
-
pos:
|
473
|
-
|
474
|
-
start_at: date
|
610
|
+
pos: pos,
|
611
|
+
track_id: track.id,
|
612
|
+
start_at: date,
|
613
|
+
event_id: @event.id
|
475
614
|
}
|
476
615
|
|
477
|
-
|
616
|
+
logger.debug race_attribs.to_json
|
617
|
+
|
618
|
+
race.update_attributes!( race_attribs )
|
478
619
|
end # lines.each
|
479
620
|
|
480
621
|
end # method load_races_worker
|
data/lib/sportdb/schema.rb
CHANGED
@@ -27,6 +27,7 @@ create_table :persons do |t| # use people ? instead of persons (person/person
|
|
27
27
|
t.string :key, :null => false # import/export key
|
28
28
|
t.string :name, :null => false
|
29
29
|
t.string :synonyms # comma separated list of synonyms
|
30
|
+
t.string :code # three letter code (short title)
|
30
31
|
|
31
32
|
## todo: add gender flag (male/female -man/lady how?)
|
32
33
|
t.date :born_at # optional date of birth (birthday)
|
@@ -61,7 +62,8 @@ end
|
|
61
62
|
create_table :tracks do |t| # e.g. Formula 1 circuits or Apline Ski resorts/slops/pistes
|
62
63
|
t.string :key, :null => false # import/export key
|
63
64
|
t.string :title, :null => false
|
64
|
-
t.string :synonyms
|
65
|
+
t.string :synonyms # comma separated list of synonyms
|
66
|
+
t.string :code # three letter code (short title)
|
65
67
|
|
66
68
|
t.references :city
|
67
69
|
t.references :region
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
################
|
4
|
+
# todo: move module to textutils!!!
|
5
|
+
|
6
|
+
### fix: move to textutils??
|
7
|
+
|
8
|
+
|
9
|
+
## todo: rename to TitleHelpers? TitleMatcher? TitleMapper? TitleMapping? TitleMappings? TitleFinder? TitleHelpers?
|
10
|
+
# or rename to KeyMapping?, KeyMapper?, KeyTable? etc.
|
11
|
+
|
12
|
+
|
13
|
+
module TextUtils::TitleTable
|
14
|
+
|
15
|
+
|
16
|
+
def build_title_table_for( records )
|
17
|
+
## build known tracks table w/ synonyms e.g.
|
18
|
+
#
|
19
|
+
# [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
|
20
|
+
# [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
|
21
|
+
# [ 'stuttgart', [ 'VfB Stuttgart' ]] ]
|
22
|
+
|
23
|
+
known_titles = []
|
24
|
+
|
25
|
+
records.each_with_index do |rec,index|
|
26
|
+
|
27
|
+
title_candidates = []
|
28
|
+
title_candidates << rec.title
|
29
|
+
|
30
|
+
title_candidates += rec.synonyms.split('|') if rec.synonyms.present?
|
31
|
+
|
32
|
+
|
33
|
+
## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
|
34
|
+
# make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
|
35
|
+
|
36
|
+
titles = []
|
37
|
+
title_candidates.each do |t|
|
38
|
+
titles << t
|
39
|
+
if t =~ /\(.+\)/
|
40
|
+
extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
|
41
|
+
extra_title.strip! # strip leading n trailing withspaces too!
|
42
|
+
titles << extra_title
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
## NB: sort here by length (largest goes first - best match)
|
48
|
+
# exclude code and key (key should always go last)
|
49
|
+
titles = titles.sort { |left,right| right.length <=> left.length }
|
50
|
+
|
51
|
+
## escape for regex plus allow subs for special chars/accents
|
52
|
+
titles = titles.map { |title| TextUtils.title_esc_regex( title ) }
|
53
|
+
|
54
|
+
## NB: only include code field - if defined
|
55
|
+
titles << rec.code if rec.respond_to?(:code) && rec.code.present?
|
56
|
+
|
57
|
+
known_titles << [ rec.key, titles ]
|
58
|
+
|
59
|
+
### fix: use plain logger
|
60
|
+
LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
|
61
|
+
end
|
62
|
+
|
63
|
+
known_titles
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
def find_key_for!( name, line )
|
69
|
+
regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
|
70
|
+
|
71
|
+
upcase_name = name.upcase
|
72
|
+
downcase_name = name.downcase
|
73
|
+
|
74
|
+
if line =~ regex
|
75
|
+
value = "#{$1}"
|
76
|
+
### fix: use plain logger
|
77
|
+
LogUtils::Logger.root.debug " #{downcase_name}: >#{value}<"
|
78
|
+
|
79
|
+
line.sub!( regex, "[#{upcase_name}]" )
|
80
|
+
|
81
|
+
return $1
|
82
|
+
else
|
83
|
+
return nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def find_keys_for!( name, line ) # NB: keys (plural!) - will return array
|
89
|
+
counter = 1
|
90
|
+
keys = []
|
91
|
+
|
92
|
+
downcase_name = name.downcase
|
93
|
+
|
94
|
+
key = find_key_for!( "#{downcase_name}#{counter}", line )
|
95
|
+
while key.present?
|
96
|
+
keys << key
|
97
|
+
counter += 1
|
98
|
+
key = find_key_for!( "#{downcase_name}#{counter}", line )
|
99
|
+
end
|
100
|
+
|
101
|
+
keys
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
def map_titles_for!( name, line, title_table )
|
106
|
+
title_table.each do |rec|
|
107
|
+
key = rec[0]
|
108
|
+
values = rec[1]
|
109
|
+
map_title_worker_for!( name, line, key, values )
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def map_title_worker_for!( name, line, key, values )
|
115
|
+
|
116
|
+
downcase_name = name.downcase
|
117
|
+
|
118
|
+
values.each do |value|
|
119
|
+
## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
|
120
|
+
## (thus add it, allows match for Benfica Lis. for example - note . at the end)
|
121
|
+
|
122
|
+
## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
|
123
|
+
regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
|
124
|
+
if line =~ regex
|
125
|
+
### fix: use plain logger
|
126
|
+
LogUtils::Logger.root.debug " match for #{downcase_name} >#{key}< >#{value}<"
|
127
|
+
# make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
|
128
|
+
line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end
|
129
|
+
return true # break out after first match (do NOT continue)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
return false
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
end # module TextUtils::TitleTable
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
## auto-include methods
|
142
|
+
|
143
|
+
module TextUtils
|
144
|
+
# make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
|
145
|
+
extend TitleTable # lets us use TextUtils.build_title_table_for etc.
|
146
|
+
end
|
data/lib/sportdb/utils.rb
CHANGED
@@ -1,48 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
### some utils moved to worldbdb/utils for reuse
|
4
|
-
|
5
|
-
|
6
|
-
### fix: move to textutils??
|
7
|
-
|
8
|
-
|
9
|
-
def build_match_table_for( recs )
|
10
|
-
## build known tracks table w/ synonyms e.g.
|
11
|
-
#
|
12
|
-
# [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
|
13
|
-
# [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
|
14
|
-
# [ 'stuttgart', [ 'VfB Stuttgart' ]] ]
|
15
|
-
|
16
|
-
known_titles = []
|
17
|
-
|
18
|
-
recs.each_with_index do |rec,index|
|
19
|
-
|
20
|
-
titles = []
|
21
|
-
titles << rec.title
|
22
|
-
titles += rec.synonyms.split('|') if rec.synonyms.present?
|
23
|
-
|
24
|
-
## NB: sort here by length (largest goes first - best match)
|
25
|
-
# exclude code and key (key should always go last)
|
26
|
-
titles = titles.sort { |left,right| right.length <=> left.length }
|
27
|
-
|
28
|
-
## escape for regex plus allow subs for special chars/accents
|
29
|
-
titles = titles.map { |title| TextUtils.title_esc_regex( title ) }
|
30
|
-
|
31
|
-
## NB: only include code field - if defined
|
32
|
-
titles << rec.code if rec.respond_to?(:code) && rec.code.present?
|
33
|
-
|
34
|
-
known_titles << [ rec.key, titles ]
|
35
|
-
|
36
|
-
### fix:
|
37
|
-
## plain logger
|
38
|
-
|
39
|
-
LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
|
40
|
-
end
|
41
|
-
|
42
|
-
known_titles
|
43
|
-
end
|
44
|
-
|
45
|
-
|
3
|
+
### note: some utils moved to worldbdb/utils for reuse
|
46
4
|
|
47
5
|
|
48
6
|
module SportDb::FixtureHelpers
|
@@ -116,11 +74,16 @@ module SportDb::FixtureHelpers
|
|
116
74
|
return [title,pos]
|
117
75
|
end
|
118
76
|
|
77
|
+
|
119
78
|
def cut_off_end_of_line_comment!( line )
|
120
79
|
# cut off (that is, remove) optional end of line comment starting w/ #
|
121
80
|
|
122
|
-
line
|
123
|
-
|
81
|
+
line.sub!( /#.*$/ ) do |_|
|
82
|
+
logger.debug " cutting off end of line comment - >>#{$&}<<"
|
83
|
+
''
|
84
|
+
end
|
85
|
+
|
86
|
+
# NB: line = line.sub will NOT work - thus, lets use line.sub!
|
124
87
|
end
|
125
88
|
|
126
89
|
|
@@ -204,15 +167,21 @@ module SportDb::FixtureHelpers
|
|
204
167
|
# nb: allow 2.3.2012 e.g. no leading zero required
|
205
168
|
# nb: allow hour as 20.30 or 3.30 instead of 03.30
|
206
169
|
regex_de = /\b(\d{1,2})\.(\d{1,2})\.\s+(\d{1,2})[:.](\d{2})\b/
|
207
|
-
|
170
|
+
|
208
171
|
# e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM
|
209
172
|
# nb: allow 2.3.2012 e.g. no leading zero required
|
210
173
|
# nb: allow hour as 20.30
|
211
174
|
regex_de2 = /\b(\d{1,2})\.(\d{1,2})\.(\d{4})\s+(\d{1,2})[:.](\d{2})\b/
|
212
175
|
|
213
176
|
|
177
|
+
month_abbrev_en = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec"
|
178
|
+
|
179
|
+
# e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM
|
180
|
+
regex_en = /\b(\d{1,2})\s(#{month_abbrev_en})\s(\d{4})\s+(\d{1,2}):(\d{2})\b/
|
181
|
+
|
182
|
+
|
214
183
|
if line =~ regex_db
|
215
|
-
value =
|
184
|
+
value = '%d-%02d-%02d %02d:%02d' % [$1, $2, $3, $4, $5]
|
216
185
|
logger.debug " date: >#{value}<"
|
217
186
|
|
218
187
|
## todo: lets you configure year
|
@@ -222,14 +191,14 @@ module SportDb::FixtureHelpers
|
|
222
191
|
|
223
192
|
return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
224
193
|
elsif line =~ regex_db2
|
225
|
-
value =
|
194
|
+
value = '%d-%02d-%02d 12:00' % [$1, $2, $3]
|
226
195
|
logger.debug " date: >#{value}<"
|
227
|
-
|
196
|
+
|
228
197
|
line.sub!( regex_db2, '[DATE.DB2]' )
|
229
198
|
|
230
199
|
return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
231
200
|
elsif line =~ regex_de2
|
232
|
-
value =
|
201
|
+
value = '%d-%02d-%02d %02d:%02d' % [$3, $2, $1, $4, $5]
|
233
202
|
logger.debug " date: >#{value}<"
|
234
203
|
|
235
204
|
## todo: lets you configure year
|
@@ -243,8 +212,8 @@ module SportDb::FixtureHelpers
|
|
243
212
|
#### fix/todo:
|
244
213
|
# get year from event start date!!!!
|
245
214
|
# do NOT hard code!!!!
|
246
|
-
|
247
|
-
value =
|
215
|
+
|
216
|
+
value = '2012-%02d-%02d %02d:%02d' % [$2, $1, $3, $4]
|
248
217
|
logger.debug " date: >#{value}<"
|
249
218
|
|
250
219
|
## todo: lets you configure year
|
@@ -253,13 +222,103 @@ module SportDb::FixtureHelpers
|
|
253
222
|
line.sub!( regex_de, '[DATE.DE]' )
|
254
223
|
|
255
224
|
return DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
225
|
+
elsif line =~ regex_en
|
226
|
+
value = '%d-%s-%02d %02d:%02d' % [$3, $2, $1, $4, $5]
|
227
|
+
logger.debug " date: >#{value}<"
|
228
|
+
|
229
|
+
line.sub!( regex_en, '[DATE.EN]' )
|
230
|
+
|
231
|
+
return DateTime.strptime( value, '%Y-%b-%d %H:%M' ) ## %b - abbreviated month name (e.g. Jan,Feb, etc.)
|
256
232
|
else
|
257
233
|
return nil
|
258
234
|
end
|
259
235
|
end
|
260
236
|
|
261
237
|
|
262
|
-
def
|
238
|
+
def find_record_comment!( line )
|
239
|
+
# assume everything left after the last record marker,that is, ] is a record comment
|
240
|
+
|
241
|
+
regex = /]([^\]]+?)$/ # NB: use non-greedy +?
|
242
|
+
|
243
|
+
if line =~ regex
|
244
|
+
value = $1.strip
|
245
|
+
return nil if value.blank? # skip whitespaces only
|
246
|
+
|
247
|
+
logger.debug " comment: >#{value}<"
|
248
|
+
|
249
|
+
line.sub!( value, '[REC.COMMENT] ' )
|
250
|
+
return value
|
251
|
+
else
|
252
|
+
return nil
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
def find_record_timeline!( line )
|
258
|
+
|
259
|
+
# +1 lap or +n laps
|
260
|
+
regex_laps = /\s+\+\d{1,2}\s(lap|laps)\b/
|
261
|
+
|
262
|
+
# 2:17:15.123
|
263
|
+
regex_time = /\b\d{1,2}:\d{2}:\d{2}\.\d{1,3}\b/
|
264
|
+
|
265
|
+
# +40.1 secs
|
266
|
+
regex_secs = /\s+\+\d{1,3}\.\d{1,3}\s(secs)\b/ # NB: before \+ - boundry (\b) will not work
|
267
|
+
|
268
|
+
# NB: $& contains the complete matched text
|
269
|
+
|
270
|
+
if line =~ regex_laps
|
271
|
+
value = $&.strip
|
272
|
+
logger.debug " timeline.laps: >#{value}<"
|
273
|
+
|
274
|
+
line.sub!( value, '[REC.TIMELINE.LAPS] ' ) # NB: add trailing space
|
275
|
+
return value
|
276
|
+
elsif line =~ regex_time
|
277
|
+
value = $&.strip
|
278
|
+
logger.debug " timeline.time: >#{value}<"
|
279
|
+
|
280
|
+
line.sub!( value, '[REC.TIMELINE.TIME] ' ) # NB: add trailing space
|
281
|
+
return value
|
282
|
+
elsif line =~ regex_secs
|
283
|
+
value = $&.strip
|
284
|
+
logger.debug " timeline.secs: >#{value}<"
|
285
|
+
|
286
|
+
line.sub!( value, '[REC.TIMELINE.SECS] ' ) # NB: add trailing space
|
287
|
+
return value
|
288
|
+
else
|
289
|
+
return nil
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
def find_record_laps!( line )
|
294
|
+
# e.g. first free-standing number w/ one or two digits e.g. 7 or 28 etc.
|
295
|
+
regex = /\b(\d{1,2})\b/
|
296
|
+
if line =~ regex
|
297
|
+
logger.debug " laps: >#{$1}<"
|
298
|
+
|
299
|
+
line.sub!( regex, '[REC.LAPS] ' ) # NB: add trailing space
|
300
|
+
return $1.to_i
|
301
|
+
else
|
302
|
+
return nil
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
def find_record_leading_state!( line )
|
307
|
+
# e.g. 1|2|3|etc or Ret - must start line
|
308
|
+
regex = /^[ \t]*(\d{1,3}|Ret)[ \t]+/
|
309
|
+
if line =~ regex
|
310
|
+
value = $1.dup
|
311
|
+
logger.debug " state: >#{value}<"
|
312
|
+
|
313
|
+
line.sub!( regex, '[REC.STATE] ' ) # NB: add trailing space
|
314
|
+
return value
|
315
|
+
else
|
316
|
+
return nil
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
|
321
|
+
def find_leading_pos!( line )
|
263
322
|
# extract optional game pos from line
|
264
323
|
# and return it
|
265
324
|
# NB: side effect - removes pos from line string
|
@@ -268,13 +327,17 @@ module SportDb::FixtureHelpers
|
|
268
327
|
regex = /^[ \t]*\((\d{1,3})\)[ \t]+/
|
269
328
|
if line =~ regex
|
270
329
|
logger.debug " pos: >#{$1}<"
|
271
|
-
|
272
|
-
line.sub!( regex, '[POS] ' )
|
330
|
+
|
331
|
+
line.sub!( regex, '[POS] ' ) # NB: add trailing space
|
273
332
|
return $1.to_i
|
274
333
|
else
|
275
334
|
return nil
|
276
335
|
end
|
336
|
+
end
|
277
337
|
|
338
|
+
def find_game_pos!( line )
|
339
|
+
## fix: add depreciation warning - remove - use find_leading_pos!
|
340
|
+
find_leading_pos!( line )
|
278
341
|
end
|
279
342
|
|
280
343
|
def find_scores!( line )
|
@@ -330,99 +393,62 @@ module SportDb::FixtureHelpers
|
|
330
393
|
end # methdod find_scores!
|
331
394
|
|
332
395
|
|
333
|
-
## todo/fix:
|
334
|
-
# find a better name find_xxx_by_title ?? find_xxx_w_match_table? or similiar
|
335
|
-
# move to its own file/module for easier maintance
|
336
|
-
# include build_match_table_for
|
337
|
-
# - lets us change internals e.g. lets improve matcher using a reverse index, for example
|
338
|
-
|
339
|
-
def find_xxx_worker!( name, line )
|
340
|
-
regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
|
341
|
-
|
342
|
-
upcase_name = name.upcase
|
343
|
-
downcase_name = name.downcase
|
344
|
-
|
345
|
-
if line =~ regex
|
346
|
-
value = "#{$1}"
|
347
|
-
logger.debug " #{downcase_name}: >#{value}<"
|
348
|
-
|
349
|
-
line.sub!( regex, "[#{upcase_name}]" )
|
350
|
-
|
351
|
-
return $1
|
352
|
-
else
|
353
|
-
return nil
|
354
|
-
end
|
355
|
-
end
|
356
|
-
|
357
|
-
|
358
|
-
def match_xxx_worker!( name, line, key, values )
|
359
|
-
|
360
|
-
downcase_name = name.downcase
|
361
396
|
|
362
|
-
|
363
|
-
|
364
|
-
## (thus add it, allows match for Benfica Lis. for example - note . at the end)
|
365
|
-
|
366
|
-
## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
|
367
|
-
regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
|
368
|
-
if line =~ regex
|
369
|
-
logger.debug " match for #{downcase_name} >#{key}< >#{value}<"
|
370
|
-
# make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
|
371
|
-
line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end
|
372
|
-
return true # break out after first match (do NOT continue)
|
373
|
-
end
|
374
|
-
end
|
375
|
-
return false
|
397
|
+
def find_teams!( line ) # NB: returns an array - note: plural! (teamsss)
|
398
|
+
TextUtils.find_keys_for!( 'team', line )
|
376
399
|
end
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
def find_teams!( line )
|
381
|
-
counter = 1
|
382
|
-
teams = []
|
383
|
-
|
384
|
-
team = find_xxx_worker!( "team#{counter}", line )
|
385
|
-
while team.present?
|
386
|
-
teams << team
|
387
|
-
counter += 1
|
388
|
-
team = find_xxx_worker!( "team#{counter}", line )
|
389
|
-
end
|
390
|
-
|
391
|
-
teams
|
400
|
+
|
401
|
+
def find_team!( line ) # NB: returns key (string or nil)
|
402
|
+
TextUtils.find_key_for!( 'team', line )
|
392
403
|
end
|
393
404
|
|
394
405
|
## todo: check if find_team1 gets used? if not remove it!! use find_teams!
|
395
406
|
def find_team1!( line )
|
396
|
-
|
407
|
+
TextUtils.find_key_for!( 'team1', line )
|
397
408
|
end
|
398
|
-
|
409
|
+
|
399
410
|
def find_team2!( line )
|
400
|
-
|
411
|
+
TextUtils.find_key_for!( 'team2', line )
|
401
412
|
end
|
402
413
|
|
403
|
-
|
404
414
|
## todo/fix: pass in known_teams as a parameter? why? why not?
|
405
|
-
def match_teams!( line )
|
406
|
-
@known_teams.each do |rec|
|
407
|
-
key = rec[0]
|
408
|
-
values = rec[1]
|
409
|
-
match_xxx_worker!( 'team', line, key, values )
|
410
|
-
end # each known_teams
|
411
|
-
end # method match_teams!
|
412
|
-
|
413
415
|
|
416
|
+
def map_teams!( line )
|
417
|
+
TextUtils.map_titles_for!( 'team', line, @known_teams )
|
418
|
+
end
|
419
|
+
|
420
|
+
def map_team!( line ) # alias map_teams!
|
421
|
+
map_teams!( line )
|
422
|
+
end
|
414
423
|
|
415
424
|
def find_track!( line )
|
416
|
-
|
425
|
+
TextUtils.find_key_for!( 'track', line )
|
417
426
|
end
|
418
427
|
|
419
428
|
## todo/fix: pass in known_tracks as a parameter? why? why not?
|
420
|
-
def
|
421
|
-
@known_tracks
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
429
|
+
def map_track!( line )
|
430
|
+
TextUtils.map_titles_for!( 'track', line, @known_tracks )
|
431
|
+
end
|
432
|
+
|
433
|
+
def find_person!( line )
|
434
|
+
TextUtils.find_key_for!( 'person', line )
|
435
|
+
end
|
436
|
+
|
437
|
+
def map_person!( line )
|
438
|
+
TextUtils.map_titles_for!( 'person', line, @known_persons)
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
|
443
|
+
## depreciated methods - use map_
|
444
|
+
def match_teams!( line ) ## fix: rename to map_teams!! - remove match_teams!
|
445
|
+
## todo: issue depreciated warning
|
446
|
+
map_teams!( line )
|
447
|
+
end # method match_teams!
|
448
|
+
|
449
|
+
def match_track!( line ) ## fix: rename to map_track!!!
|
450
|
+
## todo: issue depreciated warning
|
451
|
+
map_track!( line )
|
426
452
|
end # method match_tracks!
|
427
453
|
|
428
454
|
|
data/lib/sportdb/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.13
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-06-09 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: worlddb
|
16
|
-
requirement: &
|
16
|
+
requirement: &72775370 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.7'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *72775370
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: commander
|
27
|
-
requirement: &
|
27
|
+
requirement: &72775150 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 4.1.3
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *72775150
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rdoc
|
38
|
-
requirement: &
|
38
|
+
requirement: &72774930 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '3.10'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *72774930
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: hoe
|
49
|
-
requirement: &
|
49
|
+
requirement: &72774710 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '3.3'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *72774710
|
58
58
|
description: sportdb - sport.db command line tool
|
59
59
|
email: opensport@googlegroups.com
|
60
60
|
executables:
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- lib/sportdb/reader.rb
|
108
108
|
- lib/sportdb/schema.rb
|
109
109
|
- lib/sportdb/stats.rb
|
110
|
+
- lib/sportdb/title.rb
|
110
111
|
- lib/sportdb/utils.rb
|
111
112
|
- lib/sportdb/version.rb
|
112
113
|
- tasks/test.rb
|