fbtok 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 434d60833636dd403ce4671279c847aa446295236685f264fa31b4791d9cdf82
4
+ data.tar.gz: 55729c6c2100163e05af42f53121958f7b9442ee15e07af2bfc28a7a94cdb75f
5
+ SHA512:
6
+ metadata.gz: e0694f22d437e9d614070acf083db4c4b522d99fe20c0c53370c6dd59d73107cc9ab7f94b26a049c4f3031f2acbfef2ff64ad0eb447dd6c2a54c34293becc717
7
+ data.tar.gz: fbe918cfe469447bcc1b77e9bb12a17e6430e6dc1bf5e287637cc0cb8379f606582c9adbfb1e3150609887db26fc7f75506f7faa64ae992f4e7a5487b7d0c16e
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2025-01-02
2
+
3
+ * Everything is new. First release.
data/Manifest.txt ADDED
@@ -0,0 +1,9 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ bin/fbtok
6
+ lib/fbtok.rb
7
+ lib/fbtok/fbtok.rb
8
+ lib/fbtok/linter.rb
9
+ lib/fbtok/opts.rb
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # fbtok - football.txt lint tools incl. tokenizer, parser & more
2
+
3
+
4
+
5
+ * home :: [github.com/sportdb/footty](https://github.com/sportdb/footty)
6
+ * bugs :: [github.com/sportdb/footty/issues](https://github.com/sportdb/footty/issues)
7
+ * gem :: [rubygems.org/gems/fbtok](https://rubygems.org/gems/fbtok)
8
+ * rdoc :: [rubydoc.info/gems/fbtok](http://rubydoc.info/gems/fbtok)
9
+
10
+
11
+ ## Step 0 - Installation Via Gems
12
+
13
+ To install the command-line tool via gems (ruby's package manager) use:
14
+
15
+ ```
16
+ $ gem install fbtok
17
+ ```
18
+
19
+
20
+ ## Usage
21
+
22
+ ...
23
+
24
+
25
+
26
+ ## Questions? Comments?
27
+
28
+ Yes, you can. More than welcome.
29
+ See [Help & Support »](https://github.com/openfootball/help)
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ require 'hoe'
2
+
3
+
4
+ Hoe.spec 'fbtok' do
5
+ self.version = '0.0.1'
6
+
7
+ self.summary = "fbtok - football.txt lint tools incl. tokenizer, parser & more"
8
+ self.description = summary
9
+
10
+ self.urls = { home: 'https://github.com/sportdb/footty' }
11
+
12
+ self.author = 'Gerald Bauer'
13
+ self.email = 'gerald.bauer@gmail.com'
14
+
15
+ # switch extension to .markdown for gihub formatting
16
+ self.readme_file = 'README.md'
17
+ self.history_file = 'CHANGELOG.md'
18
+
19
+ self.licenses = ['Public Domain']
20
+
21
+ self.extra_deps = [
22
+ ['sportdb-parser', '>= 0.3.9'],
23
+ ]
24
+
25
+ self.spec_extras = {
26
+ required_ruby_version: '>= 3.1.0'
27
+ }
28
+ end
data/bin/fbtok ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ## tip: to test run:
4
+ ## ruby -I ./lib bin/fbtok
5
+
6
+ require 'fbtok'
7
+
8
+
9
+ Fbtok.main( ARGV )
10
+
11
+
12
+ puts "bye"
13
+
@@ -0,0 +1,141 @@
1
+
2
+ module Fbtok
3
+ def self.main( args=ARGV )
4
+
5
+ opts = {
6
+ debug: true,
7
+ metal: false,
8
+ file: nil,
9
+ }
10
+
11
+ parser = OptionParser.new do |parser|
12
+ parser.banner = "Usage: #{$PROGRAM_NAME} [options] PATH"
13
+
14
+
15
+ parser.on( "-q", "--quiet",
16
+ "less debug output/messages - default is (#{!opts[:debug]})" ) do |debug|
17
+ opts[:debug] = false
18
+ end
19
+ parser.on( "--verbose", "--debug",
20
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
21
+ opts[:debug] = true
22
+ end
23
+
24
+ parser.on( "--metal",
25
+ "turn off typed parse tree; show to the metal tokens"+
26
+ " (default: #{opts[:metal]})" ) do |metal|
27
+ opts[:metal] = true
28
+ end
29
+
30
+ parser.on( "-f FILE", "--file FILE",
31
+ "read datafiles (pathspecs) via .csv file") do |file|
32
+ opts[:file] = file
33
+ ## note: for batch (massive) processing auto-set debug (verbose output) to false (as default)
34
+ opts[:debug] = false
35
+ end
36
+ end
37
+ parser.parse!( args )
38
+
39
+ puts "OPTS:"
40
+ p opts
41
+ puts "ARGV:"
42
+ p args
43
+
44
+
45
+ ## todo/check - use packs or projects or such
46
+ ## instead of specs - why? why not?
47
+ specs = []
48
+ if opts[:file]
49
+ recs = read_csv( opts[:file] )
50
+ pp recs
51
+ ## note - make pathspecs relative to passed in file arg!!!
52
+ basedir = File.dirname( opts[:file] )
53
+ recs.each do |rec|
54
+ paths = SportDb::Parser::Opts.find( rec['path'], dir: basedir )
55
+ specs << [paths, rec]
56
+ end
57
+ else
58
+ paths = if args.empty?
59
+ [
60
+ '../../../openfootball/euro/2021--europe/euro.txt',
61
+ '../../../openfootball/euro/2024--germany/euro.txt',
62
+ ]
63
+ else
64
+ ## check for directories
65
+ ## and auto-expand
66
+ SportDb::Parser::Opts.expand_args( args )
67
+ end
68
+ specs << [paths, {}]
69
+ end
70
+
71
+
72
+ SportDb::Parser::Linter.debug = true if opts[:debug]
73
+
74
+ linter = SportDb::Parser::Linter.new
75
+
76
+
77
+ specs.each_with_index do |(paths, rec),i|
78
+ errors = []
79
+
80
+ paths.each_with_index do |path,j|
81
+ puts "==> [#{j+1}/#{paths.size}] reading >#{path}<..."
82
+ linter.read( path, parse: !opts[:metal] )
83
+
84
+ errors += linter.errors if linter.errors?
85
+ end
86
+
87
+ if errors.size > 0
88
+ puts
89
+ pp errors
90
+ puts
91
+ puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
92
+ else
93
+ puts
94
+ puts "OK no parse errors found in #{paths.size} datafile(s)"
95
+ end
96
+
97
+ ## add errors to rec via rec['errors'] to allow
98
+ ## for further processing/reporting
99
+ rec['errors'] = errors
100
+ end
101
+
102
+
103
+ ###
104
+ ## generate a report if --file option used
105
+ if opts[:file]
106
+
107
+ buf = String.new
108
+
109
+ buf << "# fbtok summary report - #{specs.size} dataset(s)\n\n"
110
+
111
+ specs.each_with_index do |(paths, rec),i|
112
+ errors = rec['errors']
113
+
114
+ if errors.size > 0
115
+ buf << "!! #{errors.size} ERROR(S) "
116
+ else
117
+ buf << " OK "
118
+ end
119
+ buf << "%-20s" % rec['path']
120
+ buf << " - #{paths.size} datafile(s)"
121
+ buf << "\n"
122
+
123
+ if errors.size > 0
124
+ buf << errors.pretty_inspect
125
+ buf << "\n"
126
+ end
127
+ end
128
+
129
+ puts
130
+ puts "SUMMARY:"
131
+ puts buf
132
+
133
+ # maybe write out in the future?
134
+ # basedir = File.dirname( opts[:file] )
135
+ # basename = File.basename( opts[:file], File.extname( opts[:file] ))
136
+ end
137
+
138
+
139
+
140
+ end # method self.main
141
+ end # module Fbtok
@@ -0,0 +1,156 @@
1
+
2
+ module SportDb
3
+ class Parser
4
+
5
+ ###
6
+ ## note - Linter for now nested inside Parser - keep? why? why not?
7
+ class Linter
8
+
9
+ def self.debug=(value) @@debug = value; end
10
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
11
+ def debug?() self.class.debug?; end
12
+
13
+
14
+
15
+ attr_reader :errors
16
+
17
+ def initialize
18
+ @errors = []
19
+ @parser = Parser.new ## use own parser instance (not shared) - why? why not?
20
+ end
21
+
22
+
23
+ def errors?() @errors.size > 0; end
24
+
25
+
26
+
27
+ ## note: colon (:) MUST be followed by one (or more) spaces
28
+ ## make sure mon feb 12 18:10 will not match
29
+ ## allow 1. FC Köln etc.
30
+ ## Mainz 05:
31
+ ## limit to 30 chars max
32
+ ## only allow chars incl. intl buut (NOT ()[]/;)
33
+ ##
34
+ ## Group A:
35
+ ## Group B: - remove colon
36
+ ## or lookup first
37
+
38
+ ATTRIB_RE = %r{^
39
+ [ ]*? # slurp leading spaces
40
+ (?<key>[^:|\]\[()\/; -]
41
+ [^:|\]\[()\/;]{0,30}
42
+ )
43
+ [ ]*? # slurp trailing spaces
44
+ :[ ]+
45
+ (?<value>.+)
46
+ [ ]*? # slurp trailing spaces
47
+ $
48
+ }ix
49
+
50
+
51
+ #########
52
+ ## parse - false (default) - tokenize (only)
53
+ ## - true - tokenize & parse
54
+ def read( path, parse: false )
55
+ ## note: every (new) read call - resets errors list to empty
56
+ @errors = []
57
+
58
+ nodes = OutlineReader.read( path )
59
+
60
+ ## process nodes
61
+ h1 = nil
62
+ h2 = nil
63
+ orphans = 0 ## track paragraphs's with no heading
64
+
65
+ attrib_found = false
66
+
67
+
68
+ nodes.each do |node|
69
+ type = node[0]
70
+
71
+ if type == :h1
72
+ h1 = node[1] ## get heading text
73
+ puts " = Heading 1 >#{node[1]}<"
74
+ elsif type == :h2
75
+ if h1.nil?
76
+ puts "!! WARN - no heading for subheading; skipping parse"
77
+ next
78
+ end
79
+ h2 = node[1] ## get heading text
80
+ puts " == Heading 2 >#{node[1]}<"
81
+ elsif type == :p
82
+
83
+ if h1.nil?
84
+ orphans += 1 ## only warn once
85
+ puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
86
+ next
87
+ end
88
+
89
+ lines = node[1]
90
+
91
+ tree = []
92
+ lines.each_with_index do |line,i|
93
+
94
+ if debug?
95
+ puts
96
+ puts "line >#{line}<"
97
+ end
98
+
99
+
100
+ ## skip new (experimental attrib syntax)
101
+ if attrib_found == false &&
102
+ ATTRIB_RE.match?( line )
103
+ ## note: check attrib regex AFTER group def e.g.:
104
+ ## Group A:
105
+ ## Group B: etc.
106
+ ## todo/fix - change Group A: to Group A etc.
107
+ ## Group B: to Group B
108
+ attrib_found = true
109
+ ## logger.debug "skipping key/value line - >#{line}<"
110
+ next
111
+ end
112
+
113
+ if attrib_found
114
+ ## check if line ends with dot
115
+ ## if not slurp up lines to the next do!!!
116
+ ## logger.debug "skipping key/value line - >#{line}<"
117
+ attrib_found = false if line.end_with?( '.' )
118
+ # logger.debug "skipping key/value line (cont.) - >#{line}<"
119
+ next
120
+ end
121
+
122
+ t, error_messages = if parse
123
+ @parser.parse_with_errors( line )
124
+ else
125
+ @parser.tokenize_with_errors( line )
126
+ end
127
+
128
+
129
+ if error_messages.size > 0
130
+ ## add to "global" error list
131
+ ## make a triplet tuple (file / msg / line text)
132
+ error_messages.each do |msg|
133
+ @errors << [ path,
134
+ msg,
135
+ line
136
+ ]
137
+ end
138
+ end
139
+
140
+ pp t if debug?
141
+
142
+ tree << t
143
+ end
144
+
145
+ ## pp tree
146
+ else
147
+ pp node
148
+ raise ArgumentError, "unsupported (node) type >#{type}<"
149
+ end
150
+ end # each node
151
+ end # read
152
+ end # class Linter
153
+
154
+
155
+ end # class Parser
156
+ end # module SportDb
data/lib/fbtok/opts.rb ADDED
@@ -0,0 +1,81 @@
1
+
2
+ module SportDb
3
+ class Parser
4
+
5
+
6
+ ###
7
+ ## note - Opts Helpers for now nested inside Parser - keep here? why? why not?
8
+ class Opts
9
+
10
+ SEASON_RE = %r{ (?:
11
+ \d{4}-\d{2}
12
+ | \d{4}(--[a-z0-9_-]+)?
13
+ )
14
+ }x
15
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
16
+
17
+
18
+ ## note: if pattern includes directory add here
19
+ ## (otherwise move to more "generic" datafile) - why? why not?
20
+ ## update - note include/allow dot (.) too
21
+ ## e.g. 2024-25/at.1.txt
22
+ ## change to at_1 or uefa_cl or such - why? why not?
23
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
24
+ #{SEASON}
25
+ /[a-z0-9_.-]+\.txt$ ## txt e.g /1-premierleague.txt
26
+ }x
27
+
28
+
29
+ def self.find( path, dir: nil )
30
+ ## check - rename dir
31
+ ## use root_dir or work_dir or cd or such - why? why not?
32
+
33
+ datafiles = []
34
+
35
+ ## note: normalize path - use File.expand_path ??
36
+ ## change all backslash to slash for now
37
+ ## path = path.gsub( "\\", '/' )
38
+ path = if dir
39
+ File.expand_path( path, File.expand_path( dir ))
40
+ else
41
+ File.expand_path( path )
42
+ end
43
+
44
+ ## check all txt files
45
+ ## note: incl. files starting with dot (.)) as candidates
46
+ ## (normally excluded with just *)
47
+ candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
48
+ ## pp candidates
49
+ candidates.each do |candidate|
50
+ datafiles << candidate if MATCH_RE.match( candidate )
51
+ end
52
+
53
+ ## pp datafiles
54
+ datafiles
55
+ end
56
+
57
+
58
+ def self.expand_args( args )
59
+ paths = []
60
+
61
+ args.each do |arg|
62
+ ## check if directory
63
+ if Dir.exist?( arg )
64
+ datafiles = find( arg )
65
+ puts
66
+ puts " found #{datafiles.size} match txt datafiles in #{arg}"
67
+ pp datafiles
68
+ paths += datafiles
69
+ else
70
+ ## assume it's a file
71
+ paths << arg
72
+ end
73
+ end
74
+
75
+ paths
76
+ end
77
+ end # class Opts
78
+
79
+
80
+ end # class Parser
81
+ end # module SportDb
data/lib/fbtok.rb ADDED
@@ -0,0 +1,9 @@
1
+
2
+ require 'sportdb/parser'
3
+
4
+
5
+ ## our own code
6
+ require_relative 'fbtok/opts'
7
+ require_relative 'fbtok/fbtok'
8
+
9
+
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fbtok
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-01-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sportdb-parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.3.9
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.3.9
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '4.0'
34
+ - - "<"
35
+ - !ruby/object:Gem::Version
36
+ version: '7'
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '4.0'
44
+ - - "<"
45
+ - !ruby/object:Gem::Version
46
+ version: '7'
47
+ - !ruby/object:Gem::Dependency
48
+ name: hoe
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '4.2'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '4.2'
61
+ description: fbtok - football.txt lint tools incl. tokenizer, parser & more
62
+ email: gerald.bauer@gmail.com
63
+ executables:
64
+ - fbtok
65
+ extensions: []
66
+ extra_rdoc_files:
67
+ - CHANGELOG.md
68
+ - Manifest.txt
69
+ - README.md
70
+ files:
71
+ - CHANGELOG.md
72
+ - Manifest.txt
73
+ - README.md
74
+ - Rakefile
75
+ - bin/fbtok
76
+ - lib/fbtok.rb
77
+ - lib/fbtok/fbtok.rb
78
+ - lib/fbtok/linter.rb
79
+ - lib/fbtok/opts.rb
80
+ homepage: https://github.com/sportdb/footty
81
+ licenses:
82
+ - Public Domain
83
+ metadata: {}
84
+ post_install_message:
85
+ rdoc_options:
86
+ - "--main"
87
+ - README.md
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: 3.1.0
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubygems_version: 3.5.22
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: fbtok - football.txt lint tools incl. tokenizer, parser & more
105
+ test_files: []