sportdb-formats 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,108 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
9
-
10
- class WikiClub # nested class
11
- attr_reader :name, :country
12
- def initialize( name, country )
13
- @name, @country = name, country
14
- end
15
- end # (nested) class WikiClub
16
-
17
-
18
- def world() Import.world; end
19
-
20
-
21
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
22
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
23
- parse( txt )
24
- end
25
-
26
- def self.parse( txt )
27
- new( txt ).parse
28
- end
29
-
30
- def initialize( txt )
31
- @txt = txt
32
- end
33
-
34
- def parse
35
- recs = []
36
- last_country = nil ## note: supports only one level of headings for now (and that is a country)
37
-
38
- @txt.each_line do |line|
39
- line = line.strip
40
-
41
- next if line.empty?
42
- next if line.start_with?( '#' ) ## skip comments too
43
-
44
- ## strip inline (until end-of-line) comments too
45
- ## e.g Eupen => KAS Eupen, ## [de]
46
- ## => Eupen => KAS Eupen,
47
- line = line.sub( /#.*/, '' ).strip
48
- pp line
49
-
50
-
51
- next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
52
-
53
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
54
- ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
55
- if line =~ /^(={1,}) ## leading ======
56
- ([^=]+?) ## text (note: for now no "inline" = allowed)
57
- =* ## (optional) trailing ====
58
- $/x
59
- heading_marker = $1
60
- heading_level = $1.length ## count number of = for heading level
61
- heading = $2.strip
62
-
63
- puts "heading #{heading_level} >#{heading}<"
64
-
65
- if heading_level > 1
66
- puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
67
- exit 1
68
- end
69
-
70
- ## assume country in heading; allow all "formats" supported by parse e.g.
71
- ## Österreich • Austria (at)
72
- ## Österreich • Austria
73
- ## Austria
74
- ## Deutschland (de) • Germany
75
- country = world.countries.parse( heading )
76
- ## check country code - MUST exist for now!!!!
77
- if country.nil?
78
- puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
79
- exit 1
80
- end
81
-
82
- last_country = country
83
- pp last_country
84
- else
85
- ## strip and squish (white)spaces
86
- # e.g. New York FC (2011-) => New York FC (2011-)
87
- value = line.strip.gsub( /[ \t]+/, ' ' )
88
-
89
- ## normalize (allow underscore (-) - replace with space)
90
- ## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
91
- value = value.gsub( '_', ' ' )
92
-
93
- if last_country.nil?
94
- puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
95
- exit 1
96
- end
97
-
98
- rec = WikiClub.new( value, last_country )
99
- recs << rec
100
- end
101
- end # each_line
102
- recs
103
- end # method read
104
-
105
- end # class WikiReader
106
-
107
- end ## module Import
108
- end ## module SportDb