beerdb 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/beerdb/models/beer.rb +2 -2
- data/lib/beerdb/reader.rb +48 -0
- data/lib/beerdb/version.rb +1 -1
- data/lib/beerdb.rb +153 -0
- metadata +12 -12
data/lib/beerdb/models/beer.rb
CHANGED
@@ -86,10 +86,10 @@ class Beer < ActiveRecord::Base
|
|
86
86
|
## check for grades (e.g. ***/**/*) in titles (will add attribs[:grade] to hash)
|
87
87
|
## if grade missing; set default to 4; lets us update overwrite 1,2,3 values on update
|
88
88
|
attribs[ :grade ] ||= 4
|
89
|
-
|
89
|
+
|
90
90
|
### check for "default" tags - that is, if present attribs[:tags] remove from hash
|
91
91
|
value_tag_keys += find_tags_in_attribs!( attribs )
|
92
|
-
|
92
|
+
|
93
93
|
## check for optional values
|
94
94
|
values.each_with_index do |value,index|
|
95
95
|
if match_country(value) do |country|
|
data/lib/beerdb/reader.rb
CHANGED
@@ -13,6 +13,9 @@ module Matcher
|
|
13
13
|
match_xxx_for_country_n_region( name, 'beers', &blk )
|
14
14
|
end
|
15
15
|
|
16
|
+
## todo: add match_brewpubs ???
|
17
|
+
## - autoadd brewpub flag!! use more_attribs??
|
18
|
+
|
16
19
|
def match_breweries_for_country( name, &blk )
|
17
20
|
match_xxx_for_country( name, 'breweries', &blk )
|
18
21
|
end
|
@@ -142,10 +145,45 @@ class Reader
|
|
142
145
|
load_beers_worker( name, more_attribs )
|
143
146
|
end
|
144
147
|
|
148
|
+
|
145
149
|
def load_beers_worker( name, more_attribs={} )
|
146
150
|
reader = ValuesReaderV2.new( name, include_path, more_attribs )
|
147
151
|
|
152
|
+
### todo: cleanup - check if [] works for build_title...
|
153
|
+
# better cleaner way ???
|
154
|
+
if more_attribs[:region_id].present?
|
155
|
+
known_breweries_source = Brewery.where( region_id: more_attribs[:region_id] )
|
156
|
+
elsif more_attribs[:country_id].present?
|
157
|
+
known_breweries_source = Brewery.where( country_id: more_attribs[:country_id] )
|
158
|
+
else
|
159
|
+
logger.warn "no region or country specified; use empty brewery ary for header mapper"
|
160
|
+
known_breweries_source = []
|
161
|
+
end
|
162
|
+
|
163
|
+
known_breweries = TextUtils.build_title_table_for( known_breweries_source )
|
164
|
+
|
165
|
+
|
148
166
|
reader.each_line do |new_attributes, values|
|
167
|
+
|
168
|
+
## note: check for header attrib; if present remove
|
169
|
+
### todo: cleanup code later
|
170
|
+
## fix: add to new_attributes hash instead of values ary
|
171
|
+
## - fix: match_brewery() move region,city code out of values loop for reuse at the end
|
172
|
+
if new_attributes[:header].present?
|
173
|
+
brewery_line = new_attributes[:header].dup # note: make sure we make a copy; will use in-place string ops
|
174
|
+
new_attributes.delete(:header) ## note: do NOT forget to remove from hash!
|
175
|
+
|
176
|
+
logger.debug " trying to find brewery in line >#{brewery_line}<"
|
177
|
+
## todo: check what map_titles_for! returns (nothing ???)
|
178
|
+
TextUtils.map_titles_for!( 'brewery', brewery_line, known_breweries )
|
179
|
+
brewery_key = TextUtils.find_key_for!( 'brewery', brewery_line )
|
180
|
+
logger.debug " brewery_key = >#{brewery_key}<"
|
181
|
+
unless brewery_key.nil?
|
182
|
+
## bingo! add brewery_id upfront, that is, as first value in ary
|
183
|
+
values = values.unshift "by:#{brewery_key}"
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
149
187
|
Beer.create_or_update_from_attribs( new_attributes, values )
|
150
188
|
end # each_line
|
151
189
|
end
|
@@ -187,6 +225,16 @@ class Reader
|
|
187
225
|
reader = ValuesReaderV2.new( name, include_path, more_attribs )
|
188
226
|
|
189
227
|
reader.each_line do |new_attributes, values|
|
228
|
+
|
229
|
+
#######
|
230
|
+
# fix: move to (inside)
|
231
|
+
# Brewery.create_or_update_from_attribs ||||
|
232
|
+
## note: group header not used for now; do NOT forget to remove from hash!
|
233
|
+
if new_attributes[:header].present?
|
234
|
+
logger.warn "removing unused group header #{new_attributes[:header]}"
|
235
|
+
new_attributes.delete(:header) ## note: do NOT forget to remove from hash!
|
236
|
+
end
|
237
|
+
|
190
238
|
Brewery.create_or_update_from_attribs( new_attributes, values )
|
191
239
|
end # each_line
|
192
240
|
end
|
data/lib/beerdb/version.rb
CHANGED
data/lib/beerdb.rb
CHANGED
@@ -20,6 +20,159 @@ require 'logutils'
|
|
20
20
|
require 'textutils'
|
21
21
|
require 'worlddb'
|
22
22
|
|
23
|
+
######################
|
24
|
+
# fix begin:
|
25
|
+
|
26
|
+
|
27
|
+
################
|
28
|
+
# todo: move module to textutils!!!
|
29
|
+
|
30
|
+
### fix: move to textutils??
|
31
|
+
|
32
|
+
|
33
|
+
## todo: rename to TitleHelpers? TitleMatcher? TitleMapper? TitleMapping? TitleMappings? TitleFinder? TitleHelpers?
|
34
|
+
# or rename to KeyMapping?, KeyMapper?, KeyTable? etc.
|
35
|
+
|
36
|
+
|
37
|
+
module TextUtils::TitleTable
|
38
|
+
|
39
|
+
|
40
|
+
def build_title_table_for( records )
|
41
|
+
## build known tracks table w/ synonyms e.g.
|
42
|
+
#
|
43
|
+
# [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
|
44
|
+
# [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
|
45
|
+
# [ 'stuttgart', [ 'VfB Stuttgart' ]] ]
|
46
|
+
|
47
|
+
known_titles = []
|
48
|
+
|
49
|
+
records.each_with_index do |rec,index|
|
50
|
+
|
51
|
+
title_candidates = []
|
52
|
+
title_candidates << rec.title
|
53
|
+
|
54
|
+
title_candidates += rec.synonyms.split('|') if rec.synonyms.present?
|
55
|
+
|
56
|
+
|
57
|
+
## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
|
58
|
+
# make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
|
59
|
+
|
60
|
+
titles = []
|
61
|
+
title_candidates.each do |t|
|
62
|
+
titles << t
|
63
|
+
if t =~ /\(.+\)/
|
64
|
+
extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
|
65
|
+
extra_title.strip! # strip leading n trailing withspaces too!
|
66
|
+
titles << extra_title
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
## NB: sort here by length (largest goes first - best match)
|
72
|
+
# exclude code and key (key should always go last)
|
73
|
+
titles = titles.sort { |left,right| right.length <=> left.length }
|
74
|
+
|
75
|
+
## escape for regex plus allow subs for special chars/accents
|
76
|
+
titles = titles.map { |title| TextUtils.title_esc_regex( title ) }
|
77
|
+
|
78
|
+
## NB: only include code field - if defined
|
79
|
+
titles << rec.code if rec.respond_to?(:code) && rec.code.present?
|
80
|
+
|
81
|
+
known_titles << [ rec.key, titles ]
|
82
|
+
|
83
|
+
### fix: use plain logger
|
84
|
+
LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
|
85
|
+
end
|
86
|
+
|
87
|
+
known_titles
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
def find_key_for!( name, line )
|
93
|
+
regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
|
94
|
+
|
95
|
+
upcase_name = name.upcase
|
96
|
+
downcase_name = name.downcase
|
97
|
+
|
98
|
+
if line =~ regex
|
99
|
+
value = "#{$1}"
|
100
|
+
### fix: use plain logger
|
101
|
+
LogUtils::Logger.root.debug " #{downcase_name}: >#{value}<"
|
102
|
+
|
103
|
+
line.sub!( regex, "[#{upcase_name}]" )
|
104
|
+
|
105
|
+
return $1
|
106
|
+
else
|
107
|
+
return nil
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def find_keys_for!( name, line ) # NB: keys (plural!) - will return array
|
113
|
+
counter = 1
|
114
|
+
keys = []
|
115
|
+
|
116
|
+
downcase_name = name.downcase
|
117
|
+
|
118
|
+
key = find_key_for!( "#{downcase_name}#{counter}", line )
|
119
|
+
while key.present?
|
120
|
+
keys << key
|
121
|
+
counter += 1
|
122
|
+
key = find_key_for!( "#{downcase_name}#{counter}", line )
|
123
|
+
end
|
124
|
+
|
125
|
+
keys
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def map_titles_for!( name, line, title_table )
|
130
|
+
title_table.each do |rec|
|
131
|
+
key = rec[0]
|
132
|
+
values = rec[1]
|
133
|
+
map_title_worker_for!( name, line, key, values )
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
def map_title_worker_for!( name, line, key, values )
|
139
|
+
|
140
|
+
downcase_name = name.downcase
|
141
|
+
|
142
|
+
values.each do |value|
|
143
|
+
## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
|
144
|
+
## (thus add it, allows match for Benfica Lis. for example - note . at the end)
|
145
|
+
|
146
|
+
## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
|
147
|
+
regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
|
148
|
+
if line =~ regex
|
149
|
+
### fix: use plain logger
|
150
|
+
LogUtils::Logger.root.debug " match for #{downcase_name} >#{key}< >#{value}<"
|
151
|
+
# make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
|
152
|
+
line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end
|
153
|
+
return true # break out after first match (do NOT continue)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
return false
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
end # module TextUtils::TitleTable
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
## auto-include methods
|
166
|
+
|
167
|
+
module TextUtils
|
168
|
+
# make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
|
169
|
+
extend TitleTable # lets us use TextUtils.build_title_table_for etc.
|
170
|
+
end
|
171
|
+
|
172
|
+
# quich fix end:
|
173
|
+
########################
|
174
|
+
|
175
|
+
|
23
176
|
|
24
177
|
# our own code
|
25
178
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: beerdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activerecord
|
16
|
-
requirement: &
|
16
|
+
requirement: &20877528 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '3.2'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *20877528
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: worlddb
|
27
|
-
requirement: &
|
27
|
+
requirement: &20877180 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '1.8'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *20877180
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: gli
|
38
|
-
requirement: &
|
38
|
+
requirement: &20876820 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 2.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *20876820
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rdoc
|
49
|
-
requirement: &
|
49
|
+
requirement: &20876532 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '4.0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *20876532
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: hoe
|
60
|
-
requirement: &
|
60
|
+
requirement: &20876208 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '3.7'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *20876208
|
69
69
|
description: beerdb - beer.db command line tool
|
70
70
|
email: beerdb@googlegroups.com
|
71
71
|
executables:
|