sportdb-parser 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +2 -0
- data/Rakefile +6 -2
- data/bin/fbt +9 -58
- data/lib/sportdb/parser/linter.rb +4 -8
- data/lib/sportdb/parser/opts.rb +70 -0
- data/lib/sportdb/parser/outline_reader.rb +1 -1
- data/lib/sportdb/parser/token-date.rb +19 -17
- data/lib/sportdb/parser/version.rb +24 -0
- data/lib/sportdb/parser.rb +7 -3
- metadata +32 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5af84e3a141fc577287c8c788eb27a79bf1fc78ed0c08e80df6004383788b66
|
4
|
+
data.tar.gz: 0e23fca8e4566021eb220d20925f97694fcc5b8b7c165c6ce469b5f08feb9cc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b6434b5d4df17e72a83f9b63ceef117ddfe50157073cbe4657e6e47e8aa820e8aaf986030642fc86160fd9c551cc55c9e9a35187cf09de3e0c346a00d1f58f17
|
7
|
+
data.tar.gz: dc9b9fd5c782409c019aa2de0d4aea5bdeb90a7a4e01c83ed58b08d4315f1a87ae84268f269d20a10463567633471e1b05052c24bfa47bc49e988bb927e2f927
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -6,9 +6,11 @@ bin/fbt
|
|
6
6
|
lib/sportdb/parser.rb
|
7
7
|
lib/sportdb/parser/lang.rb
|
8
8
|
lib/sportdb/parser/linter.rb
|
9
|
+
lib/sportdb/parser/opts.rb
|
9
10
|
lib/sportdb/parser/outline_reader.rb
|
10
11
|
lib/sportdb/parser/parser.rb
|
11
12
|
lib/sportdb/parser/token-date.rb
|
12
13
|
lib/sportdb/parser/token-score.rb
|
13
14
|
lib/sportdb/parser/token-text.rb
|
14
15
|
lib/sportdb/parser/token.rb
|
16
|
+
lib/sportdb/parser/version.rb
|
data/Rakefile
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'hoe'
|
2
|
+
require './lib/sportdb/parser/version.rb'
|
2
3
|
|
3
4
|
|
4
5
|
Hoe.spec 'sportdb-parser' do
|
5
6
|
|
6
|
-
self.version =
|
7
|
+
self.version = SportDb::Module::Parser::VERSION
|
7
8
|
|
8
9
|
self.summary = "sportdb-parser - football.txt match parser (& tokenizer)"
|
9
10
|
self.description = summary
|
@@ -19,7 +20,10 @@ Hoe.spec 'sportdb-parser' do
|
|
19
20
|
|
20
21
|
self.licenses = ['Public Domain']
|
21
22
|
|
22
|
-
self.extra_deps = [
|
23
|
+
self.extra_deps = [
|
24
|
+
['cocos'],
|
25
|
+
['season-formats'],
|
26
|
+
]
|
23
27
|
|
24
28
|
self.spec_extras = {
|
25
29
|
required_ruby_version: '>= 2.2.2'
|
data/bin/fbt
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
## tip: to test run:
|
4
4
|
## ruby -I ./lib bin/fbt
|
5
5
|
|
6
|
+
## our own code
|
6
7
|
require 'sportdb/parser'
|
7
8
|
|
8
9
|
|
10
|
+
|
9
11
|
require 'optparse'
|
10
12
|
|
11
13
|
##
|
@@ -15,40 +17,6 @@ require 'optparse'
|
|
15
17
|
## fbt ../openfootball/.../euro.txt
|
16
18
|
|
17
19
|
|
18
|
-
SEASON_RE = %r{ (?:
|
19
|
-
\d{4}-\d{2}
|
20
|
-
| \d{4}(--[a-z0-9_-]+)?
|
21
|
-
)
|
22
|
-
}x
|
23
|
-
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
24
|
-
|
25
|
-
|
26
|
-
## note: if pattern includes directory add here
|
27
|
-
## (otherwise move to more "generic" datafile) - why? why not?
|
28
|
-
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
29
|
-
#{SEASON}
|
30
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
31
|
-
}x
|
32
|
-
|
33
|
-
|
34
|
-
def find( path, pattern=MATCH_RE )
|
35
|
-
datafiles = []
|
36
|
-
|
37
|
-
## check all txt files
|
38
|
-
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
39
|
-
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
40
|
-
## pp candidates
|
41
|
-
candidates.each do |candidate|
|
42
|
-
datafiles << candidate if pattern.match( candidate )
|
43
|
-
end
|
44
|
-
|
45
|
-
## pp datafiles
|
46
|
-
datafiles
|
47
|
-
end
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
20
|
|
53
21
|
|
54
22
|
args = ARGV
|
@@ -86,25 +54,6 @@ p args
|
|
86
54
|
|
87
55
|
|
88
56
|
|
89
|
-
def expand_args( args )
|
90
|
-
paths = []
|
91
|
-
|
92
|
-
args.each do |arg|
|
93
|
-
## check if directory
|
94
|
-
if Dir.exist?( arg )
|
95
|
-
datafiles = find( arg )
|
96
|
-
puts
|
97
|
-
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
98
|
-
pp datafiles
|
99
|
-
paths += datafiles
|
100
|
-
else
|
101
|
-
## assume it's a file
|
102
|
-
paths << arg
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
paths
|
107
|
-
end
|
108
57
|
|
109
58
|
|
110
59
|
paths = if args.empty?
|
@@ -116,7 +65,7 @@ paths = if args.empty?
|
|
116
65
|
## check for directories
|
117
66
|
## and auto-expand
|
118
67
|
|
119
|
-
expand_args( args )
|
68
|
+
SportDb::Parser::Opts.expand_args( args )
|
120
69
|
end
|
121
70
|
|
122
71
|
|
@@ -125,17 +74,19 @@ SportDb::Parser::Linter.debug = true if opts[:debug]
|
|
125
74
|
|
126
75
|
linter = SportDb::Parser::Linter.new
|
127
76
|
|
128
|
-
|
77
|
+
errors = []
|
129
78
|
|
130
79
|
paths.each_with_index do |path,i|
|
131
80
|
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
132
81
|
linter.read( path, parse: !opts[:metal] )
|
82
|
+
|
83
|
+
errors += linter.errors if linter.errors?
|
133
84
|
end
|
134
85
|
|
135
|
-
if
|
86
|
+
if errors.size > 0
|
136
87
|
puts
|
137
|
-
pp
|
138
|
-
puts "!! #{
|
88
|
+
pp errors
|
89
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
139
90
|
else
|
140
91
|
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
141
92
|
end
|
@@ -10,12 +10,6 @@ def self.debug=(value) @@debug = value; end
|
|
10
10
|
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
11
|
def debug?() self.class.debug?; end
|
12
12
|
|
13
|
-
## keep typed - why? why not?
|
14
|
-
## - used anywhere?
|
15
|
-
def self.typed=(value) @@typed = value; end
|
16
|
-
def self.typed?() @@typed ||= true; end ## note: default is TRUE
|
17
|
-
def typed?() self.class.typed?; end
|
18
|
-
|
19
13
|
|
20
14
|
|
21
15
|
attr_reader :errors
|
@@ -58,6 +52,9 @@ def errors?() @errors.size > 0; end
|
|
58
52
|
## parse - false (default) - tokenize (only)
|
59
53
|
## - true - tokenize & parse
|
60
54
|
def read( path, parse: false )
|
55
|
+
## note: every (new) read call - resets errors list to empty
|
56
|
+
@errors = []
|
57
|
+
|
61
58
|
nodes = OutlineReader.read( path )
|
62
59
|
|
63
60
|
## process nodes
|
@@ -94,9 +91,8 @@ def read( path, parse: false )
|
|
94
91
|
|
95
92
|
|
96
93
|
## skip new (experimental attrib syntax)
|
97
|
-
m = nil
|
98
94
|
if attrib_found == false &&
|
99
|
-
|
95
|
+
ATTRIB_RE.match?( line )
|
100
96
|
## note: check attrib regex AFTER group def e.g.:
|
101
97
|
## Group A:
|
102
98
|
## Group B: etc.
|
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Parser
|
4
|
+
|
5
|
+
###
|
6
|
+
## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
|
7
|
+
class Opts
|
8
|
+
|
9
|
+
SEASON_RE = %r{ (?:
|
10
|
+
\d{4}-\d{2}
|
11
|
+
| \d{4}(--[a-z0-9_-]+)?
|
12
|
+
)
|
13
|
+
}x
|
14
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
15
|
+
|
16
|
+
|
17
|
+
## note: if pattern includes directory add here
|
18
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
19
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
20
|
+
#{SEASON}
|
21
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
22
|
+
}x
|
23
|
+
|
24
|
+
|
25
|
+
def self.find( path )
|
26
|
+
datafiles = []
|
27
|
+
|
28
|
+
## note: normalize path - use File.expand_path ??
|
29
|
+
## change all backslash to slash for now
|
30
|
+
## path = path.gsub( "\\", '/' )
|
31
|
+
path = File.expand_path( path )
|
32
|
+
|
33
|
+
## check all txt files
|
34
|
+
## note: incl. files starting with dot (.)) as candidates
|
35
|
+
## (normally excluded with just *)
|
36
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
37
|
+
## pp candidates
|
38
|
+
candidates.each do |candidate|
|
39
|
+
datafiles << candidate if MATCH_RE.match( candidate )
|
40
|
+
end
|
41
|
+
|
42
|
+
## pp datafiles
|
43
|
+
datafiles
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def self.expand_args( args )
|
48
|
+
paths = []
|
49
|
+
|
50
|
+
args.each do |arg|
|
51
|
+
## check if directory
|
52
|
+
if Dir.exist?( arg )
|
53
|
+
datafiles = find( arg )
|
54
|
+
puts
|
55
|
+
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
56
|
+
pp datafiles
|
57
|
+
paths += datafiles
|
58
|
+
else
|
59
|
+
## assume it's a file
|
60
|
+
paths << arg
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
paths
|
65
|
+
end
|
66
|
+
end # class Opts
|
67
|
+
|
68
|
+
|
69
|
+
end # class Parser
|
70
|
+
end # module SportDb
|
@@ -73,7 +73,7 @@ class OutlineReader
|
|
73
73
|
start_para = true
|
74
74
|
|
75
75
|
heading_marker = m[:marker]
|
76
|
-
heading_level =
|
76
|
+
heading_level = heading_marker.length ## count number of = for heading level
|
77
77
|
heading = m[:text].strip
|
78
78
|
|
79
79
|
puts "heading #{heading_level} >#{heading}<" if debug?
|
@@ -37,22 +37,24 @@ def self.build_names( lines )
|
|
37
37
|
end
|
38
38
|
|
39
39
|
|
40
|
+
def self.build_map( lines, downcase: false )
|
41
|
+
## note: downcase name!!!
|
42
|
+
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
43
|
+
## {"january" => 1, "jan" => 1,
|
44
|
+
## "february" => 2, "feb" => 2,
|
45
|
+
## "march" => 3, "mar" => 3,
|
46
|
+
## "april" => 4, "apr" => 4,
|
47
|
+
## "may" => 5,
|
48
|
+
## "june" => 6, "jun" => 6, ...
|
49
|
+
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
50
|
+
line.each do |name|
|
51
|
+
h[ downcase ? name.downcase : name ] = i+1
|
52
|
+
end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
53
|
+
h
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
40
57
|
|
41
|
-
## add normalize option (for downcase) - why? why not?
|
42
|
-
def self.build_map( lines )
|
43
|
-
## note: downcase name!!!
|
44
|
-
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
45
|
-
## {"january" => 1, "jan" => 1,
|
46
|
-
## "february" => 2, "feb" => 2,
|
47
|
-
## "march" => 3, "mar" => 3,
|
48
|
-
## "april" => 4, "apr" => 4,
|
49
|
-
## "may" => 5,
|
50
|
-
## "june" => 6, "jun" => 6, ...
|
51
|
-
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
52
|
-
line.each { |name| h[ name.downcase ] = i+1 } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
53
|
-
h
|
54
|
-
end
|
55
|
-
end
|
56
58
|
|
57
59
|
|
58
60
|
MONTH_LINES = parse_names( <<TXT )
|
@@ -72,7 +74,7 @@ TXT
|
|
72
74
|
|
73
75
|
MONTH_NAMES = build_names( MONTH_LINES )
|
74
76
|
# pp MONTH_NAMES
|
75
|
-
MONTH_MAP = build_map( MONTH_LINES )
|
77
|
+
MONTH_MAP = build_map( MONTH_LINES, downcase: true )
|
76
78
|
# pp MONTH_MAP
|
77
79
|
|
78
80
|
|
@@ -89,7 +91,7 @@ TXT
|
|
89
91
|
|
90
92
|
DAY_NAMES = build_names( DAY_LINES )
|
91
93
|
# pp DAY_NAMES
|
92
|
-
DAY_MAP = build_map( DAY_LINES )
|
94
|
+
DAY_MAP = build_map( DAY_LINES, downcase: true )
|
93
95
|
# pp DAY_MAP
|
94
96
|
|
95
97
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Module
|
4
|
+
module Parser
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 1
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"sportdb-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module Parser
|
23
|
+
end
|
24
|
+
end
|
data/lib/sportdb/parser.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## pulls in
|
2
|
+
require 'cocos'
|
3
|
+
require 'season/formats' # e.g. Season() support machinery
|
4
|
+
|
1
5
|
|
2
6
|
|
3
7
|
####
|
@@ -11,7 +15,7 @@
|
|
11
15
|
## text - change text to name - why? why not?
|
12
16
|
|
13
17
|
|
14
|
-
|
18
|
+
require_relative 'parser/version'
|
15
19
|
require_relative 'parser/token-score'
|
16
20
|
require_relative 'parser/token-date'
|
17
21
|
require_relative 'parser/token-text'
|
@@ -23,6 +27,7 @@ require_relative 'parser/parser'
|
|
23
27
|
## more
|
24
28
|
require_relative 'parser/outline_reader'
|
25
29
|
require_relative 'parser/linter'
|
30
|
+
require_relative 'parser/opts'
|
26
31
|
|
27
32
|
|
28
33
|
###
|
@@ -39,6 +44,5 @@ end # module SportDb
|
|
39
44
|
=end
|
40
45
|
|
41
46
|
|
42
|
-
|
43
|
-
|
47
|
+
puts SportDb::Module::Parser.banner # say hello
|
44
48
|
|
metadata
CHANGED
@@ -1,15 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: cocos
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: season-formats
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
13
41
|
- !ruby/object:Gem::Dependency
|
14
42
|
name: rdoc
|
15
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,12 +90,14 @@ files:
|
|
62
90
|
- lib/sportdb/parser.rb
|
63
91
|
- lib/sportdb/parser/lang.rb
|
64
92
|
- lib/sportdb/parser/linter.rb
|
93
|
+
- lib/sportdb/parser/opts.rb
|
65
94
|
- lib/sportdb/parser/outline_reader.rb
|
66
95
|
- lib/sportdb/parser/parser.rb
|
67
96
|
- lib/sportdb/parser/token-date.rb
|
68
97
|
- lib/sportdb/parser/token-score.rb
|
69
98
|
- lib/sportdb/parser/token-text.rb
|
70
99
|
- lib/sportdb/parser/token.rb
|
100
|
+
- lib/sportdb/parser/version.rb
|
71
101
|
homepage: https://github.com/sportdb/sport.db
|
72
102
|
licenses:
|
73
103
|
- Public Domain
|