icu_tournament 1.3.6 → 1.3.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -109,6 +109,67 @@ module ICU
109
109
  # [122 Time control] Get or set with _time_control_. Free text.
110
110
  # [132 Round dates] Get an array of dates using _round_dates_ or one specific round date by calling _round_date_ with a round number.
111
111
  #
112
+ # == Parser Strictness
113
+ #
114
+ # In practice, Krause formatted files encontered in the wild can be produced in a variety of different ways and not always according to
115
+ # FIDE's standard, which itself is rather loose. This Ruby gem deals with that situation by not raising parsing errors when data is encountered
116
+ # where it is clear what is meant, even if it doesn't conform to the standards, such as they are. However, on output (serialisation) a strict
117
+ # interpretation of FIDE's standard is adhered to.
118
+ #
119
+ # For example in input data if a player's gender is given as "F" it's clear this means female, even though the specification calls for a lower
120
+ # case "w" (for woman) in this case. Similarly, for titles where, for example, both "GM" and FIDE's "g" are recognised as meaning Grand Master.
121
+ #
122
+ # When it comes to dates, the specification recommends the YYYY/MM/DD format for birth dates and YY/MM/DD for round dates but quotes an example where
123
+ # the start and finish dates are in the opposite order (DD.MM.YYYY) with a different separator. In practice, the author has encountered Krause files
124
+ # with US style date formatting (MM-DD-YYYY) and other bizarre formats (YY.DD.MM) which suffer from ambiguity when the day is 12 or less.
125
+ # It's not the separator ("/", "=", ".") that causes a problem but the year, month and day order. The solution adopted here is for all serialized
126
+ # dates to be in YYYY-MM-DD format (or YY-MM-DD for round dates which must fit in 8 characters), which is a recognised international standard
127
+ # (ISO 8601). However, for parsing, a much wider variation is permitted and there is some ability to detect and correct ambiguous dates. For example
128
+ # the following dates would all be interpreted as 2011-03-30:
129
+ #
130
+ # * 30th March 2011
131
+ # * 30.03.2011
132
+ # * 03/30/2011
133
+ #
134
+ # Where no additional information is available to resolve an ambiguity, the month is assumed to come in the middle, so 04/03/2011 is interpreted
135
+ # as 2011.03.04 and not 2011.04.03.
136
+ #
137
+ # Some Krause files that the author has encountered in the wild have 3-letter player federation codes that are not federations at all but something
138
+ # completely different (for example the first 3 letters of the player's club). This is a clear violation of the specification and raises a parsing
139
+ # exception. However in practice it's often necessary to deal with such files so the parser has two options to help in these cases. If the _fed_ option
140
+ # is set to "ignore" then all player federation codes will be ignored, even if valid. While when set to "skip" then invalid codes will be ignored but
141
+ # valid ones retained.
142
+ #
143
+ # tournament = parser.parse_file('tournament.tab', :fed => "ignore")
144
+ # tournament = parser.parse_file('tournament.tab', :fed => "skip")
145
+ #
146
+ # Similar options are available for parsing SwissPerfect files (see ICU::Tournament::SwissPerfect) which can suffer from the same problem.
147
+ #
148
+ # == Automatic Total Correction
149
+ #
150
+ # Another problem encountered with Krause files in practice is a mismatch between the declared total points for a player and the sum of their points
151
+ # from each round. Normally this just raises a parsing exception. However, there is one set of circumstances when such mismatches can be repaired:
152
+ #
153
+ # * the declared total score is higher than the sum of scores,
154
+ # * the player has at least one bye which isn't a full point bye or at least one round where no result is recorded,
155
+ # * the number of byes or missing results is enough to account for the difference in total score.
156
+ #
157
+ # If all these conditions are met then just enough bye scores are incremented, or new byes created, to make the sum match the total, and the
158
+ # data will parse without raising an exception.
159
+ #
160
+ # 012 Mismatched Totals
161
+ # 042 2011.03.04
162
+ # 001 1 Mouse,Minerva 1.0 2 2 b 0 0000 - =
163
+ # 001 2 Mouse,Mickey 1.5 1 1 w 1
164
+ #
165
+ # In this example both totals are underestimates. However, player 1 has a half-point bye which can be upgraded to a full-point and player 2
166
+ # has no result in round 2 which leaves room for the creation of a new half-point bye. So this data parses without error and serializes to:
167
+ #
168
+ # 012 Mismatched Totals
169
+ # 042 2011-03-04
170
+ # 001 1 Mouse,Minerva 1.0 2 2 b 0 0000 - +
171
+ # 001 2 Mouse,Mickey 1.5 1 1 w 1 0000 - =
172
+ #
112
173
  class Krause
113
174
  attr_reader :error, :comments
114
175
 
@@ -120,18 +181,18 @@ module ICU
120
181
  @comments = ''
121
182
  @results = Array.new
122
183
  krs = ICU::Util.to_utf8(krs) unless arg[:is_utf8]
184
+ lines = get_lines(krs)
123
185
 
124
186
  # Process all lines.
125
- krs.each_line do |line|
126
- @lineno += 1 # increment line number
127
- line.strip! # remove leading and trailing white space
128
- next if line == '' # skip blank lines
129
- @line = line # remember this line for later
187
+ lines.each do |line|
188
+ @lineno += 1 # increment line number
189
+ next if line.match(/^\s*$/) # skip blank lines
190
+ @line = line # remember this line for later
130
191
 
131
192
  # Does it have a DIN or is it just a comment?
132
193
  if @line.match(/^(\d{3}) (.*)$/)
133
- din = $1 # data identification number (DIN)
134
- @data = $2 # the data after the DIN
194
+ din = $1 # data identification number (DIN)
195
+ @data = $2 # the data after the DIN
135
196
  else
136
197
  add_comment
137
198
  next
@@ -210,7 +271,7 @@ module ICU
210
271
  end
211
272
  end
212
273
 
213
- # Serialise a tournament back into Krause format.
274
+ # Serialize a tournament back into Krause format.
214
275
  def serialize(t, arg={})
215
276
  t.validate!(:type => self)
216
277
  krause = ''
@@ -255,11 +316,30 @@ module ICU
255
316
  @tournament.start = @data
256
317
  @start_set = true
257
318
  end
319
+
320
+ # Split text into lines but also pad the player lines (those beginning "001 ").
321
+ def get_lines(text)
322
+ lines = text.split(/\s*\n/)
323
+ max = 99 # length up to the end of round 1 result, including DIN
324
+ lines.each do |line|
325
+ next unless line.match(/^001 /)
326
+ next unless line.length > max
327
+ max+= 10 * (1 + (line.length - max - 1) / 10) # increase by multiples of 10, the length of 1 result (including 2-space prefix)
328
+ end
329
+ lines.each_index do |i|
330
+ line = lines[i]
331
+ next unless line.match(/^001 /)
332
+ next unless line.length < max
333
+ line+= ' ' * (max - line.length)
334
+ lines[i] = line
335
+ end
336
+ lines
337
+ end
258
338
 
259
339
  def add_player(arg={})
260
340
  raise "player record less than minimum length" if @line.length < 99
261
341
 
262
- # Player details.
342
+ # Prepare player details.
263
343
  num = @data[0, 4]
264
344
  nam = @data[10, 32]
265
345
  nams = nam.split(/,/)
@@ -272,27 +352,50 @@ module ICU
272
352
  :dob => @data[65, 10],
273
353
  :rank => @data[81, 4],
274
354
  }
275
- opt[arg[:fide] ? :fide_id : :id] = @data[53, 11]
276
- opt[arg[:fide] ? :fide_rating : :rating] = @data[44, 4]
355
+
356
+ # The IDs and ratings can be local or international.
357
+ itype = arg[:fide] ? :fide_id : :id
358
+ rtype = arg[:fide] ? :fide_rating : :rating
359
+ opt[itype] = @data[53, 11]
360
+ opt[rtype] = @data[44, 4]
361
+
362
+ # Remove obviously bad data.
363
+ opt.delete(itype) if opt.has_key?(itype) && opt[itype].to_i == 0
364
+ opt.delete(rtype) if opt.has_key?(rtype) && opt[rtype].to_i == 0
365
+
366
+ # Options to remove other bad data.
367
+ opt.delete(:fed) if arg[:fed].to_s == 'ignore'
368
+ opt.delete(:fed) if arg[:fed].to_s == 'skip' && !ICU::Federation.find(opt[:fed])
369
+
370
+ # Create the player.
277
371
  player = Player.new(nams.last, nams.first, num, opt)
278
372
  @tournament.add_player(player)
279
373
 
280
374
  # Results.
281
- points = @data[77, 4].strip
282
- points = points == '' ? nil : points.to_f
283
- index = 87
284
- round = 1
285
- total = 0.0
286
- while @data.length >= index + 8
287
- total+= add_result(round, player.num, @data[index, 8])
375
+ total = @data[77, 4].strip
376
+ total = total == '' ? nil : total.to_f
377
+ index = 87
378
+ round = 1
379
+ sum = 0.0
380
+ full_byes = []
381
+ half_byes = []
382
+ while @data.length > index
383
+ sum+= add_result(round, player.num, @data[index, 8], full_byes, half_byes)
288
384
  index+= 10
289
385
  round+= 1
290
386
  end
291
- raise "declared points total (#{points}) does not agree with total from summed results (#{total})" if points && points != total
387
+ if total
388
+ sum = total if total != sum && fix_sum(player.num, full_byes, half_byes, total, sum)
389
+ raise "declared points total (#{total}) does not agree with summed scores (#{sum})" if total != sum
390
+ end
292
391
  end
293
392
 
294
- def add_result(round, player, data)
295
- return 0.0 if data.strip! == '' # no result for this round
393
+ def add_result(round, player, data, full_byes, half_byes)
394
+ data.strip!
395
+ if data.match(/^-?$/)
396
+ full_byes << round
397
+ return 0.0
398
+ end
296
399
  raise "invalid result '#{data}'" unless data.match(/^(0{1,4}|[1-9]\d{0,3}) (w|b|-) (1|0|=|\+|-)$/)
297
400
  opponent = $1.to_i
298
401
  colour = $2
@@ -303,8 +406,41 @@ module ICU
303
406
  options[:rateable] = false unless score.match(/^(1|0|=)$/)
304
407
  result = Result.new(round, player, score, options)
305
408
  @results << [@lineno, player, data, result]
409
+ if opponent == 0
410
+ case score
411
+ when '-' then full_byes << result
412
+ when '=' then half_byes << result
413
+ end
414
+ end
306
415
  result.points
307
416
  end
417
+
418
+ # See if byes can be used to make the sum of scores match the declared total.
419
+ def fix_sum(player, full_byes, half_byes, total, sum)
420
+ return false unless total > sum
421
+ return false unless total <= sum + full_byes.size * 1.0 + half_byes.size * 0.5
422
+ full_byes.each_index do |i|
423
+ bye = full_byes[i]
424
+ if bye.class == Fixnum
425
+ # Round number - create a half-point bye in that round.
426
+ result = Result.new(bye, player, '=')
427
+ @results << ['none', player, "extra bye for player #{player} in round #{bye}", result]
428
+ full_byes[i] = result
429
+ else
430
+ # Zero point bye - upgrade to a half point.
431
+ bye.score = 'D'
432
+ end
433
+ sum += 0.5
434
+ return true if total == sum
435
+ end
436
+ (half_byes + full_byes).each do |bye|
437
+ # Upgrade to full point.
438
+ bye.score = 'W'
439
+ sum += 0.5
440
+ return true if total == sum
441
+ end
442
+ return false
443
+ end
308
444
 
309
445
  def add_team
310
446
  raise error "team record less than minimum length" if @line.length < 40
@@ -319,9 +455,17 @@ module ICU
319
455
 
320
456
  def add_round_dates
321
457
  raise "round dates record less than minimum length" if @line.length < 99
322
- index = 87
458
+ index = 87
459
+ american = nil
323
460
  while @data.length >= index + 8
324
461
  date = @data[index, 8].strip
462
+ # Cope with heinous date formats like yy.dd.mm.
463
+ if date.match((/^(\d{2}).(\d{2}).(\d{2})$/))
464
+ if american.nil?
465
+ american = $2.to_i > 12 || (@tournament.start[5,2] == $3 && @tournament.start[8,2] != $2)
466
+ end
467
+ date = "#{$1}.#{$3}.#{$2}" if american
468
+ end
325
469
  @tournament.add_round_date("20#{date}") unless date == ''
326
470
  index+= 10
327
471
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module ICU
4
4
  class Tournament
5
- VERSION = "1.3.6"
5
+ VERSION = "1.3.7"
6
6
  end
7
7
  end
@@ -397,7 +397,7 @@ KRAUSE
397
397
  end
398
398
 
399
399
  it "inconsistent totals should cause an error" do
400
- @k.sub!('4.0', '4.5')
400
+ @k.sub!('3.5', '4.0')
401
401
  lambda { @p.parse!(@k) }.should raise_error(/total/)
402
402
  end
403
403
 
@@ -513,6 +513,138 @@ KRAUSE
513
513
  @t.name.should == "Läs Végas National Opeñ"
514
514
  end
515
515
  end
516
+
517
+ context "automatic repairing of totals" do
518
+ before(:each) do
519
+ @p = ICU::Tournament::Krause.new
520
+ end
521
+
522
+ it "cannot repair mismatched totals if there are no byes" do
523
+ @k = <<KRAUSE
524
+ 012 Las Vegas National Open
525
+ 042 2008-06-07
526
+ 001 1 Ui Laighleis,Gearoidin 0.5 2 b 0 2 w 0
527
+ 001 2 Or,Mark 2.0 1 w 1 1 b 1
528
+ KRAUSE
529
+ lambda { @p.parse!(@k) }.should raise_error(/total/)
530
+ end
531
+
532
+ it "cannot repair mismatched totals if totals are underestimated" do
533
+ @k = <<KRAUSE
534
+ 012 Las Vegas National Open
535
+ 042 2008-06-07
536
+ 001 1 Ui Laighleis,Gearoidin 0.0 2 b 0 0000 - -
537
+ 001 2 Orr,Mark 1.5 1 w 1 0000 - +
538
+ KRAUSE
539
+ lambda { @p.parse!(@k) }.should raise_error(/total/)
540
+ end
541
+
542
+ it "cannot repair overestimated totals if there are not enough byes" do
543
+ @k = <<KRAUSE
544
+ 012 Las Vegas National Open
545
+ 042 2008-06-07
546
+ 001 1 Ui Laighleis,Gearoidin 1.5 2 b 0 0000 - -
547
+ 001 2 Orr,Mark 2.0 1 w 1 0000 - +
548
+ KRAUSE
549
+ lambda { @p.parse!(@k) }.should raise_error(/total/)
550
+ end
551
+
552
+ it "can repair overestimated totals if there are enough byes" do
553
+ @k = <<KRAUSE
554
+ 012 Las Vegas National Open
555
+ 042 2008-06-07
556
+ 001 1 Ui Laighleis,Gearoidin 1.0 2 b 0 0000 - -
557
+ 001 2 ORR,Mark 2.0 1 w 1 0000 - +
558
+ KRAUSE
559
+ @t = @p.parse!(@k)
560
+ @t.should_not be_nil
561
+ check_results(1, 2, 1.0)
562
+ @t.player(1).find_result(2).score.should == 'W'
563
+ end
564
+
565
+ it "extreme example" do
566
+ @k = <<KRAUSE
567
+ 012 Las Vegas National Open
568
+ 042 2008-06-07
569
+ 001 1 Ui Laighleis,Gearoidin 2.0 2 b 0 0000 - - 0000 - =
570
+ 001 2 Orr,Mark 2.5 1 w 1 0000 - +
571
+ 001 3 Brady,Stephen 1.0 0000 - - 4 b 0 0000 - =
572
+ 001 4 Knox,Angela 2.5 0000 - - 3 w 1 0000 - -
573
+ KRAUSE
574
+ @t = @p.parse!(@k)
575
+ @t.should_not be_nil
576
+ @t.player(1).results.map(&:score).join('').should == 'LWW'
577
+ @t.player(2).results.map(&:score).join('').should == 'WWD'
578
+ @t.player(3).results.map(&:score).join('').should == 'DLD'
579
+ @t.player(4).results.map(&:score).join('').should == 'WWD'
580
+ end
581
+
582
+ it "should work on the documentation example" do
583
+ @k = <<KRAUSE
584
+ 012 Mismatched Totals
585
+ 042 2011-03-04
586
+ 001 1 Mouse,Minerva 1.0 2 2 b 0 0000 - =
587
+ 001 2 Mouse,Mickey 1.5 1 1 w 1
588
+ KRAUSE
589
+ @t = @p.parse!(@k)
590
+ output = <<KRAUSE
591
+ 012 Mismatched Totals
592
+ 042 2011-03-04
593
+ 001 1 Mouse,Minerva 1.0 2 2 b 0 0000 - +
594
+ 001 2 Mouse,Mickey 1.5 1 1 w 1 0000 - =
595
+ KRAUSE
596
+ @t.serialize('Krause').should == output
597
+ end
598
+ end
599
+
600
+ context "parsing variations on strict Krause" do
601
+ before(:each) do
602
+ @p = ICU::Tournament::Krause.new
603
+ @s = File.dirname(__FILE__) + '/samples/krause'
604
+ end
605
+
606
+ it "should handle Bunratty Masters 2011" do
607
+ file = "#{@s}/bunratty_masters_2011.tab"
608
+ @t = @p.parse_file(file, :fed => :skip, :fide => true)
609
+ @t.should_not be_nil
610
+ @t.start.should == "2011-02-25"
611
+ @t.finish.should == "2011-02-27"
612
+ check_player(1, 'Nigel', 'Short', :gender => 'M', :fide_rating => 2658, :fed => 'ENG', :rating => nil, :rank => 5, :title => 'GM')
613
+ check_results(1, 6, 4.0)
614
+ check_player(16, 'Jonathan', "O'Connor", :gender => 'M', :fide_rating => 2111, :fed => nil, :rating => nil, :rank => 25, :title => nil)
615
+ check_results(16, 6, 2.5)
616
+ @t.player(16).results.map(&:score).join('').should == 'DWLDDL'
617
+ check_player(24, 'David', 'Murray', :gender => 'M', :fide_rating => 2023, :fed => nil, :rating => nil, :rank => 34, :title => nil)
618
+ check_results(24, 2, 0.5)
619
+ @t.player(24).results.map(&:score).join('').should == 'LD'
620
+ check_player(26, 'Alexandra', 'Wilson', :gender => 'F', :fide_rating => 2020, :fed => 'ENG', :rating => nil, :rank => 29, :title => 'WFM')
621
+ check_results(26, 6, 2.0)
622
+ end
623
+
624
+ it "should handle Bunratty Major 2011" do
625
+ file = "#{@s}/bunratty_major_2011.tab"
626
+ @t = @p.parse_file(file, :fed => :ignore)
627
+ @t.should_not be_nil
628
+ @t.start.should == "2011-02-25"
629
+ @t.finish.should == "2011-02-27"
630
+ check_player(1, 'Dan', 'Clancy', :gender => 'M', :fide_rating => nil, :fed => nil, :id => 204, :rating => nil, :rank => 12)
631
+ check_results(1, 6, 4)
632
+ check_player(10, 'Phillip', 'Foenander', :gender => 'M', :fide_rating => nil, :fed => nil, :id => 7168, :rating => nil, :rank => 18)
633
+ check_results(10, 6, 3.5)
634
+ check_player(40, 'Ron', 'Cummins', :gender => 'M', :fide_rating => nil, :fed => nil, :id => 4610, :rating => nil, :rank => 56)
635
+ check_results(40, 1, 0.0)
636
+ end
637
+
638
+ it "should handle bunratty_minor_2011.tab" do
639
+ file = "#{@s}/bunratty_minor_2011.tab"
640
+ lambda { @p.parse_file!(file, :fed => :ignore) }.should_not raise_error
641
+ end
642
+
643
+ it "should handle Bunratty Challengers 2011" do
644
+ file = "#{@s}/bunratty_challengers_2011.tab"
645
+ lambda { @p.parse_file!(file, :fed => :ignore) }.should_not raise_error
646
+ end
647
+ end
516
648
  end
517
649
  end
518
650
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 3
8
- - 6
9
- version: 1.3.6
8
+ - 7
9
+ version: 1.3.7
10
10
  platform: ruby
11
11
  authors:
12
12
  - Mark Orr
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-27 00:00:00 +00:00
17
+ date: 2011-03-04 00:00:00 +00:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency