sportdb-formats 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,108 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
9
-
10
- class WikiClub # nested class
11
- attr_reader :name, :country
12
- def initialize( name, country )
13
- @name, @country = name, country
14
- end
15
- end # (nested) class WikiClub
16
-
17
-
18
- def world() Import.world; end
19
-
20
-
21
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
22
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
23
- parse( txt )
24
- end
25
-
26
- def self.parse( txt )
27
- new( txt ).parse
28
- end
29
-
30
- def initialize( txt )
31
- @txt = txt
32
- end
33
-
34
- def parse
35
- recs = []
36
- last_country = nil ## note: supports only one level of headings for now (and that is a country)
37
-
38
- @txt.each_line do |line|
39
- line = line.strip
40
-
41
- next if line.empty?
42
- next if line.start_with?( '#' ) ## skip comments too
43
-
44
- ## strip inline (until end-of-line) comments too
45
- ## e.g Eupen => KAS Eupen, ## [de]
46
- ## => Eupen => KAS Eupen,
47
- line = line.sub( /#.*/, '' ).strip
48
- pp line
49
-
50
-
51
- next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
52
-
53
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
54
- ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
55
- if line =~ /^(={1,}) ## leading ======
56
- ([^=]+?) ## text (note: for now no "inline" = allowed)
57
- =* ## (optional) trailing ====
58
- $/x
59
- heading_marker = $1
60
- heading_level = $1.length ## count number of = for heading level
61
- heading = $2.strip
62
-
63
- puts "heading #{heading_level} >#{heading}<"
64
-
65
- if heading_level > 1
66
- puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
67
- exit 1
68
- end
69
-
70
- ## assume country in heading; allow all "formats" supported by parse e.g.
71
- ## Österreich • Austria (at)
72
- ## Österreich • Austria
73
- ## Austria
74
- ## Deutschland (de) • Germany
75
- country = world.countries.parse( heading )
76
- ## check country code - MUST exist for now!!!!
77
- if country.nil?
78
- puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
79
- exit 1
80
- end
81
-
82
- last_country = country
83
- pp last_country
84
- else
85
- ## strip and squish (white)spaces
86
- # e.g. New York FC (2011-) => New York FC (2011-)
87
- value = line.strip.gsub( /[ \t]+/, ' ' )
88
-
89
- ## normalize (allow underscore (-) - replace with space)
90
- ## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
91
- value = value.gsub( '_', ' ' )
92
-
93
- if last_country.nil?
94
- puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
95
- exit 1
96
- end
97
-
98
- rec = WikiClub.new( value, last_country )
99
- recs << rec
100
- end
101
- end # each_line
102
- recs
103
- end # method read
104
-
105
- end # class WikiReader
106
-
107
- end ## module Import
108
- end ## module SportDb