sportdb-parser 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +2 -0
- data/Rakefile +6 -2
- data/bin/fbt +9 -58
- data/lib/sportdb/parser/linter.rb +4 -8
- data/lib/sportdb/parser/opts.rb +70 -0
- data/lib/sportdb/parser/outline_reader.rb +1 -1
- data/lib/sportdb/parser/token-date.rb +19 -17
- data/lib/sportdb/parser/version.rb +24 -0
- data/lib/sportdb/parser.rb +7 -3
- metadata +32 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5af84e3a141fc577287c8c788eb27a79bf1fc78ed0c08e80df6004383788b66
|
4
|
+
data.tar.gz: 0e23fca8e4566021eb220d20925f97694fcc5b8b7c165c6ce469b5f08feb9cc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b6434b5d4df17e72a83f9b63ceef117ddfe50157073cbe4657e6e47e8aa820e8aaf986030642fc86160fd9c551cc55c9e9a35187cf09de3e0c346a00d1f58f17
|
7
|
+
data.tar.gz: dc9b9fd5c782409c019aa2de0d4aea5bdeb90a7a4e01c83ed58b08d4315f1a87ae84268f269d20a10463567633471e1b05052c24bfa47bc49e988bb927e2f927
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -6,9 +6,11 @@ bin/fbt
|
|
6
6
|
lib/sportdb/parser.rb
|
7
7
|
lib/sportdb/parser/lang.rb
|
8
8
|
lib/sportdb/parser/linter.rb
|
9
|
+
lib/sportdb/parser/opts.rb
|
9
10
|
lib/sportdb/parser/outline_reader.rb
|
10
11
|
lib/sportdb/parser/parser.rb
|
11
12
|
lib/sportdb/parser/token-date.rb
|
12
13
|
lib/sportdb/parser/token-score.rb
|
13
14
|
lib/sportdb/parser/token-text.rb
|
14
15
|
lib/sportdb/parser/token.rb
|
16
|
+
lib/sportdb/parser/version.rb
|
data/Rakefile
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'hoe'
|
2
|
+
require './lib/sportdb/parser/version.rb'
|
2
3
|
|
3
4
|
|
4
5
|
Hoe.spec 'sportdb-parser' do
|
5
6
|
|
6
|
-
self.version =
|
7
|
+
self.version = SportDb::Module::Parser::VERSION
|
7
8
|
|
8
9
|
self.summary = "sportdb-parser - football.txt match parser (& tokenizer)"
|
9
10
|
self.description = summary
|
@@ -19,7 +20,10 @@ Hoe.spec 'sportdb-parser' do
|
|
19
20
|
|
20
21
|
self.licenses = ['Public Domain']
|
21
22
|
|
22
|
-
self.extra_deps = [
|
23
|
+
self.extra_deps = [
|
24
|
+
['cocos'],
|
25
|
+
['season-formats'],
|
26
|
+
]
|
23
27
|
|
24
28
|
self.spec_extras = {
|
25
29
|
required_ruby_version: '>= 2.2.2'
|
data/bin/fbt
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
## tip: to test run:
|
4
4
|
## ruby -I ./lib bin/fbt
|
5
5
|
|
6
|
+
## our own code
|
6
7
|
require 'sportdb/parser'
|
7
8
|
|
8
9
|
|
10
|
+
|
9
11
|
require 'optparse'
|
10
12
|
|
11
13
|
##
|
@@ -15,40 +17,6 @@ require 'optparse'
|
|
15
17
|
## fbt ../openfootball/.../euro.txt
|
16
18
|
|
17
19
|
|
18
|
-
SEASON_RE = %r{ (?:
|
19
|
-
\d{4}-\d{2}
|
20
|
-
| \d{4}(--[a-z0-9_-]+)?
|
21
|
-
)
|
22
|
-
}x
|
23
|
-
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
24
|
-
|
25
|
-
|
26
|
-
## note: if pattern includes directory add here
|
27
|
-
## (otherwise move to more "generic" datafile) - why? why not?
|
28
|
-
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
29
|
-
#{SEASON}
|
30
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
31
|
-
}x
|
32
|
-
|
33
|
-
|
34
|
-
def find( path, pattern=MATCH_RE )
|
35
|
-
datafiles = []
|
36
|
-
|
37
|
-
## check all txt files
|
38
|
-
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
39
|
-
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
40
|
-
## pp candidates
|
41
|
-
candidates.each do |candidate|
|
42
|
-
datafiles << candidate if pattern.match( candidate )
|
43
|
-
end
|
44
|
-
|
45
|
-
## pp datafiles
|
46
|
-
datafiles
|
47
|
-
end
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
20
|
|
53
21
|
|
54
22
|
args = ARGV
|
@@ -86,25 +54,6 @@ p args
|
|
86
54
|
|
87
55
|
|
88
56
|
|
89
|
-
def expand_args( args )
|
90
|
-
paths = []
|
91
|
-
|
92
|
-
args.each do |arg|
|
93
|
-
## check if directory
|
94
|
-
if Dir.exist?( arg )
|
95
|
-
datafiles = find( arg )
|
96
|
-
puts
|
97
|
-
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
98
|
-
pp datafiles
|
99
|
-
paths += datafiles
|
100
|
-
else
|
101
|
-
## assume it's a file
|
102
|
-
paths << arg
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
paths
|
107
|
-
end
|
108
57
|
|
109
58
|
|
110
59
|
paths = if args.empty?
|
@@ -116,7 +65,7 @@ paths = if args.empty?
|
|
116
65
|
## check for directories
|
117
66
|
## and auto-expand
|
118
67
|
|
119
|
-
expand_args( args )
|
68
|
+
SportDb::Parser::Opts.expand_args( args )
|
120
69
|
end
|
121
70
|
|
122
71
|
|
@@ -125,17 +74,19 @@ SportDb::Parser::Linter.debug = true if opts[:debug]
|
|
125
74
|
|
126
75
|
linter = SportDb::Parser::Linter.new
|
127
76
|
|
128
|
-
|
77
|
+
errors = []
|
129
78
|
|
130
79
|
paths.each_with_index do |path,i|
|
131
80
|
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
132
81
|
linter.read( path, parse: !opts[:metal] )
|
82
|
+
|
83
|
+
errors += linter.errors if linter.errors?
|
133
84
|
end
|
134
85
|
|
135
|
-
if
|
86
|
+
if errors.size > 0
|
136
87
|
puts
|
137
|
-
pp
|
138
|
-
puts "!! #{
|
88
|
+
pp errors
|
89
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
139
90
|
else
|
140
91
|
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
141
92
|
end
|
@@ -10,12 +10,6 @@ def self.debug=(value) @@debug = value; end
|
|
10
10
|
def self.debug?() @@debug ||= false; end ## note: default is FALSE
|
11
11
|
def debug?() self.class.debug?; end
|
12
12
|
|
13
|
-
## keep typed - why? why not?
|
14
|
-
## - used anywhere?
|
15
|
-
def self.typed=(value) @@typed = value; end
|
16
|
-
def self.typed?() @@typed ||= true; end ## note: default is TRUE
|
17
|
-
def typed?() self.class.typed?; end
|
18
|
-
|
19
13
|
|
20
14
|
|
21
15
|
attr_reader :errors
|
@@ -58,6 +52,9 @@ def errors?() @errors.size > 0; end
|
|
58
52
|
## parse - false (default) - tokenize (only)
|
59
53
|
## - true - tokenize & parse
|
60
54
|
def read( path, parse: false )
|
55
|
+
## note: every (new) read call - resets errors list to empty
|
56
|
+
@errors = []
|
57
|
+
|
61
58
|
nodes = OutlineReader.read( path )
|
62
59
|
|
63
60
|
## process nodes
|
@@ -94,9 +91,8 @@ def read( path, parse: false )
|
|
94
91
|
|
95
92
|
|
96
93
|
## skip new (experimental attrib syntax)
|
97
|
-
m = nil
|
98
94
|
if attrib_found == false &&
|
99
|
-
|
95
|
+
ATTRIB_RE.match?( line )
|
100
96
|
## note: check attrib regex AFTER group def e.g.:
|
101
97
|
## Group A:
|
102
98
|
## Group B: etc.
|
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Parser
|
4
|
+
|
5
|
+
###
|
6
|
+
## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
|
7
|
+
class Opts
|
8
|
+
|
9
|
+
SEASON_RE = %r{ (?:
|
10
|
+
\d{4}-\d{2}
|
11
|
+
| \d{4}(--[a-z0-9_-]+)?
|
12
|
+
)
|
13
|
+
}x
|
14
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
15
|
+
|
16
|
+
|
17
|
+
## note: if pattern includes directory add here
|
18
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
19
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
20
|
+
#{SEASON}
|
21
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
22
|
+
}x
|
23
|
+
|
24
|
+
|
25
|
+
def self.find( path )
|
26
|
+
datafiles = []
|
27
|
+
|
28
|
+
## note: normalize path - use File.expand_path ??
|
29
|
+
## change all backslash to slash for now
|
30
|
+
## path = path.gsub( "\\", '/' )
|
31
|
+
path = File.expand_path( path )
|
32
|
+
|
33
|
+
## check all txt files
|
34
|
+
## note: incl. files starting with dot (.)) as candidates
|
35
|
+
## (normally excluded with just *)
|
36
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
37
|
+
## pp candidates
|
38
|
+
candidates.each do |candidate|
|
39
|
+
datafiles << candidate if MATCH_RE.match( candidate )
|
40
|
+
end
|
41
|
+
|
42
|
+
## pp datafiles
|
43
|
+
datafiles
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def self.expand_args( args )
|
48
|
+
paths = []
|
49
|
+
|
50
|
+
args.each do |arg|
|
51
|
+
## check if directory
|
52
|
+
if Dir.exist?( arg )
|
53
|
+
datafiles = find( arg )
|
54
|
+
puts
|
55
|
+
puts " found #{datafiles.size} match txt datafiles in #{arg}"
|
56
|
+
pp datafiles
|
57
|
+
paths += datafiles
|
58
|
+
else
|
59
|
+
## assume it's a file
|
60
|
+
paths << arg
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
paths
|
65
|
+
end
|
66
|
+
end # class Opts
|
67
|
+
|
68
|
+
|
69
|
+
end # class Parser
|
70
|
+
end # module SportDb
|
@@ -73,7 +73,7 @@ class OutlineReader
|
|
73
73
|
start_para = true
|
74
74
|
|
75
75
|
heading_marker = m[:marker]
|
76
|
-
heading_level =
|
76
|
+
heading_level = heading_marker.length ## count number of = for heading level
|
77
77
|
heading = m[:text].strip
|
78
78
|
|
79
79
|
puts "heading #{heading_level} >#{heading}<" if debug?
|
@@ -37,22 +37,24 @@ def self.build_names( lines )
|
|
37
37
|
end
|
38
38
|
|
39
39
|
|
40
|
+
def self.build_map( lines, downcase: false )
|
41
|
+
## note: downcase name!!!
|
42
|
+
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
43
|
+
## {"january" => 1, "jan" => 1,
|
44
|
+
## "february" => 2, "feb" => 2,
|
45
|
+
## "march" => 3, "mar" => 3,
|
46
|
+
## "april" => 4, "apr" => 4,
|
47
|
+
## "may" => 5,
|
48
|
+
## "june" => 6, "jun" => 6, ...
|
49
|
+
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
50
|
+
line.each do |name|
|
51
|
+
h[ downcase ? name.downcase : name ] = i+1
|
52
|
+
end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
53
|
+
h
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
40
57
|
|
41
|
-
## add normalize option (for downcase) - why? why not?
|
42
|
-
def self.build_map( lines )
|
43
|
-
## note: downcase name!!!
|
44
|
-
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
45
|
-
## {"january" => 1, "jan" => 1,
|
46
|
-
## "february" => 2, "feb" => 2,
|
47
|
-
## "march" => 3, "mar" => 3,
|
48
|
-
## "april" => 4, "apr" => 4,
|
49
|
-
## "may" => 5,
|
50
|
-
## "june" => 6, "jun" => 6, ...
|
51
|
-
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
52
|
-
line.each { |name| h[ name.downcase ] = i+1 } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
53
|
-
h
|
54
|
-
end
|
55
|
-
end
|
56
58
|
|
57
59
|
|
58
60
|
MONTH_LINES = parse_names( <<TXT )
|
@@ -72,7 +74,7 @@ TXT
|
|
72
74
|
|
73
75
|
MONTH_NAMES = build_names( MONTH_LINES )
|
74
76
|
# pp MONTH_NAMES
|
75
|
-
MONTH_MAP = build_map( MONTH_LINES )
|
77
|
+
MONTH_MAP = build_map( MONTH_LINES, downcase: true )
|
76
78
|
# pp MONTH_MAP
|
77
79
|
|
78
80
|
|
@@ -89,7 +91,7 @@ TXT
|
|
89
91
|
|
90
92
|
DAY_NAMES = build_names( DAY_LINES )
|
91
93
|
# pp DAY_NAMES
|
92
|
-
DAY_MAP = build_map( DAY_LINES )
|
94
|
+
DAY_MAP = build_map( DAY_LINES, downcase: true )
|
93
95
|
# pp DAY_MAP
|
94
96
|
|
95
97
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Module
|
4
|
+
module Parser
|
5
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
6
|
+
MINOR = 1
|
7
|
+
PATCH = 0
|
8
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
9
|
+
|
10
|
+
def self.version
|
11
|
+
VERSION
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.banner
|
15
|
+
"sportdb-parser/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.root
|
19
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end # module Parser
|
23
|
+
end
|
24
|
+
end
|
data/lib/sportdb/parser.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## pulls in
|
2
|
+
require 'cocos'
|
3
|
+
require 'season/formats' # e.g. Season() support machinery
|
4
|
+
|
1
5
|
|
2
6
|
|
3
7
|
####
|
@@ -11,7 +15,7 @@
|
|
11
15
|
## text - change text to name - why? why not?
|
12
16
|
|
13
17
|
|
14
|
-
|
18
|
+
require_relative 'parser/version'
|
15
19
|
require_relative 'parser/token-score'
|
16
20
|
require_relative 'parser/token-date'
|
17
21
|
require_relative 'parser/token-text'
|
@@ -23,6 +27,7 @@ require_relative 'parser/parser'
|
|
23
27
|
## more
|
24
28
|
require_relative 'parser/outline_reader'
|
25
29
|
require_relative 'parser/linter'
|
30
|
+
require_relative 'parser/opts'
|
26
31
|
|
27
32
|
|
28
33
|
###
|
@@ -39,6 +44,5 @@ end # module SportDb
|
|
39
44
|
=end
|
40
45
|
|
41
46
|
|
42
|
-
|
43
|
-
|
47
|
+
puts SportDb::Module::Parser.banner # say hello
|
44
48
|
|
metadata
CHANGED
@@ -1,15 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: cocos
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: season-formats
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
13
41
|
- !ruby/object:Gem::Dependency
|
14
42
|
name: rdoc
|
15
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,12 +90,14 @@ files:
|
|
62
90
|
- lib/sportdb/parser.rb
|
63
91
|
- lib/sportdb/parser/lang.rb
|
64
92
|
- lib/sportdb/parser/linter.rb
|
93
|
+
- lib/sportdb/parser/opts.rb
|
65
94
|
- lib/sportdb/parser/outline_reader.rb
|
66
95
|
- lib/sportdb/parser/parser.rb
|
67
96
|
- lib/sportdb/parser/token-date.rb
|
68
97
|
- lib/sportdb/parser/token-score.rb
|
69
98
|
- lib/sportdb/parser/token-text.rb
|
70
99
|
- lib/sportdb/parser/token.rb
|
100
|
+
- lib/sportdb/parser/version.rb
|
71
101
|
homepage: https://github.com/sportdb/sport.db
|
72
102
|
licenses:
|
73
103
|
- Public Domain
|