food_ingredient_parser 1.0.0.pre.5 → 1.0.0.pre.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +44 -8
- data/bin/food_ingredient_parser +13 -5
- data/lib/food_ingredient_parser/cleaner.rb +16 -0
- data/lib/food_ingredient_parser/loose/node.rb +60 -0
- data/lib/food_ingredient_parser/loose/parser.rb +24 -0
- data/lib/food_ingredient_parser/loose/scanner.rb +191 -0
- data/lib/food_ingredient_parser/loose/transform/amount.rb +70 -0
- data/lib/food_ingredient_parser/loose/transform/amount_from_name.treetop +13 -0
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/amount.treetop +6 -5
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/common.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient_coloned.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient_nested.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient_simple.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/list.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/list_coloned.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/list_newlined.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/root.treetop +1 -1
- data/lib/food_ingredient_parser/strict/nodes.rb +74 -0
- data/lib/food_ingredient_parser/{parser.rb → strict/parser.rb} +3 -15
- data/lib/food_ingredient_parser/strict/to_html.rb +54 -0
- data/lib/food_ingredient_parser/version.rb +2 -2
- data/lib/food_ingredient_parser.rb +2 -1
- metadata +22 -16
- data/lib/food_ingredient_parser/nodes.rb +0 -72
- data/lib/food_ingredient_parser/to_html.rb +0 -52
- /data/lib/food_ingredient_parser/{grammar.rb → strict/grammar.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54bdb9187f9a2dfbec67737ddc2a3ad90f4ca058
|
4
|
+
data.tar.gz: fcfc99674e0f58801ca3a375acebe91ba3f80c84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 773526e862a74f04614486f3542de0c89f8663e10f959978a7d3a2e1ba8703e8a9ae93bde6b043a62326be68956733a30e6c01d85b6578dba0abc9064590fb18
|
7
|
+
data.tar.gz: e3296ae3222745f20727eed70bd1a9dac84a1714f1bdc64c2cd524bfbd6589a470afea33eab1e4ce717dbec9f4b3b3d8d9350025d13a4046e78cea3d0aea9b6d
|
data/README.md
CHANGED
@@ -22,11 +22,11 @@ require 'food_ingredient_parser'
|
|
22
22
|
s = "Water* 60%, suiker 30%, voedingszuren: citroenzuur, appelzuur, zuurteregelaar: E576/E577, " \
|
23
23
|
+ "natuurlijke citroen-limoen aroma's 0,2%, zoetstof: steviolglycosiden, * = Biologisch. " \
|
24
24
|
+ "E = door de E.U. goedgekeurde toevoeging."
|
25
|
-
parser = FoodIngredientParser::Parser.new
|
25
|
+
parser = FoodIngredientParser::Strict::Parser.new
|
26
26
|
puts parser.parse(s).to_h.inspect
|
27
27
|
```
|
28
28
|
Results in
|
29
|
-
```
|
29
|
+
```ruby
|
30
30
|
{
|
31
31
|
:contains=>[
|
32
32
|
{:name=>"Water", :amount=>"60%", :mark=>"*"},
|
@@ -58,14 +58,15 @@ running this from the source tree, use `bin/food_ingredient_parser` instead.
|
|
58
58
|
|
59
59
|
```
|
60
60
|
$ food_ingredient_parser -h
|
61
|
-
Usage: food_ingredient_parser [options] --file|-f <filename>
|
62
|
-
food_ingredient_parser [options] --string|-s <ingredients>
|
61
|
+
Usage: bin/food_ingredient_parser [options] --file|-f <filename>
|
62
|
+
bin/food_ingredient_parser [options] --string|-s <ingredients>
|
63
63
|
|
64
64
|
-f, --file FILE Parse all lines of the file as ingredient lists.
|
65
65
|
-s, --string INGREDIENTS Parse specified ingredient list.
|
66
66
|
-q, --[no-]quiet Only show summary.
|
67
67
|
-p, --parsed Only show lines that were successfully parsed.
|
68
|
-
-
|
68
|
+
-r, --parser PARSER Use specific parser (strict, loose).
|
69
|
+
-e, --[no-]escape Escape newlines
|
69
70
|
-c, --[no-]color Use color
|
70
71
|
-n, --noresult Only show lines that had no result.
|
71
72
|
-v, --[no-]verbose Show more data (parsed tree).
|
@@ -102,6 +103,12 @@ RootNode+Root3 offset=0, "tomato" (contains,notes):
|
|
102
103
|
SyntaxNode offset=6, ""
|
103
104
|
{:contains=>[{:name=>"tomato"}]}
|
104
105
|
|
106
|
+
$ food_ingredient_parser -v -r loose -s "tomato"
|
107
|
+
"tomato"
|
108
|
+
Node interval=0..5
|
109
|
+
Node interval=0..5, name="tomato"
|
110
|
+
{:contains=>[{:name=>"tomato"}]}
|
111
|
+
|
105
112
|
$ food_ingredient_parser -q -f data/test-cases
|
106
113
|
parsed 35 (100.0%), no result 0 (0.0%)
|
107
114
|
```
|
@@ -114,12 +121,12 @@ When ingredient lists are entered manually, it can be very useful to show how th
|
|
114
121
|
recognized. This can help understanding why a certain ingredients list cannot be parsed.
|
115
122
|
|
116
123
|
For this you can use the `to_html` method on the parsed output, which returns the original
|
117
|
-
text, augmented with CSS classes for different parts.
|
124
|
+
text, augmented with CSS classes for different parts. (Available for strict parser only.)
|
118
125
|
|
119
126
|
```ruby
|
120
127
|
require 'food_ingredient_parser'
|
121
128
|
|
122
|
-
parsed = FoodIngredientParser::Parser.new.parse("Saus (10% tomaat*, zout). * = bio")
|
129
|
+
parsed = FoodIngredientParser::Strict::Parser.new.parse("Saus (10% tomaat*, zout). * = bio")
|
123
130
|
puts parsed.to_html
|
124
131
|
```
|
125
132
|
|
@@ -138,9 +145,38 @@ For an example of an interactive editor, see [examples/editor.rb](examples/edito
|
|
138
145
|
|
139
146
|
![editor example screenshot](examples/editor-screenshot.png)
|
140
147
|
|
148
|
+
## Loose parser
|
149
|
+
|
150
|
+
The strict parser only parses ingredient lists that conform to one of the many different
|
151
|
+
formats expected. If you'd like to return a result always, even if that is not necessarily
|
152
|
+
completely correct, you can use the _loose_ parser. This does not use Treetop, but looks
|
153
|
+
at the input character for character and tries to make the best of it. Nevertheless, if you
|
154
|
+
just want to have _some_ result, this can still be very useful.
|
155
|
+
|
156
|
+
```ruby
|
157
|
+
require 'food_ingredient_parser'
|
158
|
+
|
159
|
+
parsed = FoodIngredientParser::Loose::Parser.new.parse("Saus [10% tomaat*, (zout); peper.")
|
160
|
+
puts parsed.to_h
|
161
|
+
```
|
162
|
+
|
163
|
+
Even though the strict parser would not give a result, the loose parser returns:
|
164
|
+
```ruby
|
165
|
+
{
|
166
|
+
:contains=>[
|
167
|
+
{:name=>"Saus", :contains=>[
|
168
|
+
{:name=>"tomaat", :mark=>"*", :amount=>"10%"},
|
169
|
+
{:contains=>[{:name=>"zout"}]},
|
170
|
+
{:name=>"peper"}
|
171
|
+
]}
|
172
|
+
]
|
173
|
+
}
|
174
|
+
```
|
175
|
+
|
141
176
|
## Test data
|
142
177
|
|
143
178
|
[`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
|
144
179
|
real-world ingredient lists found on the Dutch market. Each line contains one ingredient
|
145
180
|
list (newlines are encoded as `\n`, empty lines and those starting with `#` are ignored).
|
146
|
-
|
181
|
+
The strict parser currently parses about three quarter, while the loose parser returns
|
182
|
+
something for all of them.
|
data/bin/food_ingredient_parser
CHANGED
@@ -31,8 +31,7 @@ def colorize(color, s)
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
def parse_single(s, parsed=nil, parser
|
35
|
-
parser ||= FoodIngredientParser::Parser.new
|
34
|
+
def parse_single(s, parsed=nil, parser:, verbosity: 1, print: nil, escape: false, color: false)
|
36
35
|
parsed ||= parser.parse(s)
|
37
36
|
|
38
37
|
return unless print.nil? || (parsed && print == :parsed) || (!parsed && print == :noresult)
|
@@ -47,7 +46,7 @@ def parse_single(s, parsed=nil, parser: nil, verbosity: 1, print: nil, escape: f
|
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
50
|
-
def parse_file(path, parser
|
49
|
+
def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: false)
|
51
50
|
count_parsed = count_noresult = 0
|
52
51
|
File.foreach(path) do |line|
|
53
52
|
next if line =~ /^#/ # comment
|
@@ -70,8 +69,13 @@ verbosity = 1
|
|
70
69
|
files = []
|
71
70
|
strings = []
|
72
71
|
print = nil
|
72
|
+
parser_name = :strict
|
73
73
|
escape = false
|
74
74
|
color = true
|
75
|
+
PARSERS = {
|
76
|
+
strict: FoodIngredientParser::Strict::Parser,
|
77
|
+
loose: FoodIngredientParser::Loose::Parser
|
78
|
+
}
|
75
79
|
OptionParser.new do |opts|
|
76
80
|
opts.banner = <<-EOF.gsub(/^ /, '')
|
77
81
|
Usage: #{$0} [options] --file|-f <filename>
|
@@ -84,7 +88,8 @@ OptionParser.new do |opts|
|
|
84
88
|
|
85
89
|
opts.on("-q", "--[no-]quiet", "Only show summary.") {|q| verbosity = q ? 0 : 1 }
|
86
90
|
opts.on("-p", "--parsed", "Only show lines that were successfully parsed.") {|p| print = :parsed }
|
87
|
-
opts.on("-
|
91
|
+
opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
|
92
|
+
opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
|
88
93
|
opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
|
89
94
|
opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
|
90
95
|
opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
|
@@ -99,7 +104,10 @@ OptionParser.new do |opts|
|
|
99
104
|
end.parse!
|
100
105
|
|
101
106
|
if strings.any? || files.any?
|
102
|
-
parser =
|
107
|
+
unless parser = PARSERS[parser_name]&.new
|
108
|
+
STDERR.puts("Please specify one of the known parsers: #{PARSERS.keys.join(", ")}.")
|
109
|
+
exit(1)
|
110
|
+
end
|
103
111
|
strings.each {|s| parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
|
104
112
|
files.each {|f| parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
|
105
113
|
else
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module FoodIngredientParser
|
2
|
+
module Cleaner
|
3
|
+
|
4
|
+
def self.clean(s)
|
5
|
+
s.gsub!("\u00ad", "") # strip soft hyphen
|
6
|
+
s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
|
7
|
+
s.gsub!("aÄs", "aïs") # encoding issue for maïs
|
8
|
+
s.gsub!("ï", "ï") # encoding issue
|
9
|
+
s.gsub!("ë", "ë") # encoding issue
|
10
|
+
s.gsub!(/\A\s*"(.*)"\s*\z/, '\1') # enclosing double quotation marks
|
11
|
+
s.gsub!(/\A\s*'(.*)'\s*\z/, '\1') # enclosing single quotation marks
|
12
|
+
s
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module FoodIngredientParser::Loose
|
2
|
+
# Parsing result.
|
3
|
+
class Node
|
4
|
+
attr_accessor :name, :mark, :amount, :contains, :notes
|
5
|
+
attr_reader :input, :interval, :auto_close
|
6
|
+
|
7
|
+
def initialize(input, interval, auto_close: false)
|
8
|
+
@input = input
|
9
|
+
@interval = interval.is_a?(Range) ? interval : ( interval .. interval )
|
10
|
+
@auto_close = auto_close
|
11
|
+
@contains = []
|
12
|
+
@notes = []
|
13
|
+
@name = @mark = @amount = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def ends(index)
|
17
|
+
@interval = @interval.first .. index
|
18
|
+
end
|
19
|
+
|
20
|
+
def <<(child)
|
21
|
+
@contains << child
|
22
|
+
end
|
23
|
+
|
24
|
+
def text_value
|
25
|
+
@input[@interval]
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_h
|
29
|
+
r = {}
|
30
|
+
r[:name] = name.text_value.strip if name && name.text_value.strip != ''
|
31
|
+
r[:mark] = mark.text_value.strip if mark
|
32
|
+
r[:amount] = amount.text_value.strip if amount
|
33
|
+
r[:contains] = contains.map(&:to_h).reject {|c| c == {} } if contains.any?
|
34
|
+
r[:notes] = notes.map{|n| n.text_value.strip }.reject {|c| c == '' } if notes.any?
|
35
|
+
r
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect(indent="", variant="")
|
39
|
+
inspect_self(indent, variant) +
|
40
|
+
inspect_children(indent)
|
41
|
+
end
|
42
|
+
|
43
|
+
def inspect_self(indent="", variant="")
|
44
|
+
[
|
45
|
+
indent + "Node#{variant} interval=#{@interval}",
|
46
|
+
name ? "name=#{name.text_value.strip.inspect}" : nil,
|
47
|
+
mark ? "mark=#{mark.text_value.strip.inspect}" : nil,
|
48
|
+
amount ? "amount=#{amount.text_value.strip.inspect}" : nil,
|
49
|
+
auto_close ? "auto_close" : nil
|
50
|
+
].compact.join(", ")
|
51
|
+
end
|
52
|
+
|
53
|
+
def inspect_children(indent="")
|
54
|
+
[
|
55
|
+
*contains.map {|child| "\n" + child.inspect(indent + " ") },
|
56
|
+
*notes.map {|note| "\n" + note.inspect(indent + " ", "(note)") }
|
57
|
+
].join("")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative '../cleaner'
|
2
|
+
require_relative 'scanner'
|
3
|
+
require_relative 'transform/amount'
|
4
|
+
|
5
|
+
module FoodIngredientParser::Loose
|
6
|
+
class Parser
|
7
|
+
|
8
|
+
# Create a new food ingredient stream parser
|
9
|
+
# @return [FoodIngredientParser::StreamParser]
|
10
|
+
def initialize
|
11
|
+
end
|
12
|
+
|
13
|
+
# Parse food ingredient list text into a structured representation.
|
14
|
+
#
|
15
|
+
# @option clean [Boolean] pass +false+ to disable correcting frequently occuring issues
|
16
|
+
# @return [FoodIngredientParser::Loose::Node] structured representation of food ingredients
|
17
|
+
def parse(s, clean: true, **options)
|
18
|
+
s = FoodIngredientParser::Cleaner.clean(s) if clean
|
19
|
+
n = Scanner.new(s).scan
|
20
|
+
n = Transform::Amount.transform!(n) if n
|
21
|
+
n
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
require_relative 'node'
|
2
|
+
|
3
|
+
module FoodIngredientParser::Loose
|
4
|
+
class Scanner
|
5
|
+
|
6
|
+
SEP_CHARS = "|;,.".freeze
|
7
|
+
MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°#^*".freeze
|
8
|
+
PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\s*[:;.]\s*/i.freeze
|
9
|
+
NOTE_RE = /\A\b(dit product kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b)/i.freeze
|
10
|
+
|
11
|
+
def initialize(s, index: 0)
|
12
|
+
@s = s # input string
|
13
|
+
@i = index # current index in string
|
14
|
+
@cur = nil # current node we're populating
|
15
|
+
@ancestors = [Node.new(@s, @i)] # nesting hierarchy
|
16
|
+
@iterator = :beginning # scan_iteration_<iterator> to use for parsing
|
17
|
+
@dest = :contains # append current node to this attribute on parent
|
18
|
+
end
|
19
|
+
|
20
|
+
def scan
|
21
|
+
loop do
|
22
|
+
method(:"scan_iteration_#{@iterator}").call
|
23
|
+
end
|
24
|
+
|
25
|
+
close_all_ancestors
|
26
|
+
@ancestors.first.ends(@i-1)
|
27
|
+
@ancestors.first
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def loop
|
33
|
+
while @i < @s.length
|
34
|
+
@i += 1 if yield != false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def scan_iteration_beginning
|
39
|
+
# skip over some common prefixes
|
40
|
+
m = @s[@i .. -1].match(PREFIX_RE)
|
41
|
+
@i += m.offset(0).last if m
|
42
|
+
# now continue with the standard parsing
|
43
|
+
@iterator = :standard
|
44
|
+
false
|
45
|
+
end
|
46
|
+
|
47
|
+
def scan_iteration_standard
|
48
|
+
if "([".include?(c) # open nesting
|
49
|
+
open_parent
|
50
|
+
elsif ")]".include?(c) # close nesting
|
51
|
+
add_child
|
52
|
+
close_parent
|
53
|
+
elsif is_notes_start? # usually a dot marks the start of notes
|
54
|
+
close_all_ancestors
|
55
|
+
@iterator = :notes
|
56
|
+
@dest = :notes
|
57
|
+
elsif is_sep? # separator
|
58
|
+
add_child
|
59
|
+
elsif ":".include?(c) # another open nesting
|
60
|
+
add_child
|
61
|
+
open_parent(auto_close: true)
|
62
|
+
@iterator = :colon
|
63
|
+
elsif is_mark? && !cur.mark # mark after ingredient
|
64
|
+
name_until_here
|
65
|
+
len = mark_len
|
66
|
+
cur.mark = Node.new(@s, @i .. @i+len-1)
|
67
|
+
@i += len - 1
|
68
|
+
else
|
69
|
+
cur # reference to record starting position
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def scan_iteration_colon
|
74
|
+
if "/".include?(c) # slash separator in colon nesting only
|
75
|
+
add_child
|
76
|
+
elsif is_sep? # regular separator indicates end of colon nesting
|
77
|
+
add_child
|
78
|
+
close_parent
|
79
|
+
# revert to standard parsing from here on
|
80
|
+
@iterator = :standard
|
81
|
+
scan_iteration_standard
|
82
|
+
elsif "([]):".include?(c) # continue with deeper nesting level
|
83
|
+
# revert to standard parsing from here on
|
84
|
+
@iterator = :standard
|
85
|
+
scan_iteration_standard
|
86
|
+
else
|
87
|
+
# normal handling for this character
|
88
|
+
scan_iteration_standard
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def scan_iteration_notes
|
93
|
+
if is_sep?(chars: ".") # dot means new note
|
94
|
+
add_child
|
95
|
+
else
|
96
|
+
cur
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def c
|
101
|
+
@s[@i]
|
102
|
+
end
|
103
|
+
|
104
|
+
def parent
|
105
|
+
@ancestors.last
|
106
|
+
end
|
107
|
+
|
108
|
+
def cur
|
109
|
+
@cur ||= Node.new(@s, @i)
|
110
|
+
end
|
111
|
+
|
112
|
+
def is_mark?
|
113
|
+
mark_len > 0 && @s[@i..@i+1] !~ /\A°[CF]/
|
114
|
+
end
|
115
|
+
|
116
|
+
def is_sep?(chars: SEP_CHARS)
|
117
|
+
chars.include?(c) && @s[@i-1..@i+1] !~ /\A\d.\d\z/
|
118
|
+
end
|
119
|
+
|
120
|
+
def mark_len
|
121
|
+
i = @i
|
122
|
+
while @s[i] && MARK_CHARS.include?(@s[i])
|
123
|
+
i += 1
|
124
|
+
end
|
125
|
+
i - @i
|
126
|
+
end
|
127
|
+
|
128
|
+
def is_notes_start?
|
129
|
+
# @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
|
130
|
+
if ( is_mark? && @s[@i+mark_len..-1] =~ /\A\s*=/ ) || # "* = Biologisch"
|
131
|
+
( is_mark? && @s[@i-2..@i-1] =~ /\A\s\s/ ) || # " **Biologisch"
|
132
|
+
( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
|
133
|
+
@i -= 1 # we want to include the mark in the note
|
134
|
+
true
|
135
|
+
# End of sentence
|
136
|
+
elsif dot_is_not_sep? && is_sep?(chars: ".")
|
137
|
+
true
|
138
|
+
else
|
139
|
+
false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def add_child
|
144
|
+
cur.ends(@i-1)
|
145
|
+
cur.name ||= Node.new(@s, cur.interval)
|
146
|
+
parent.send(@dest) << cur
|
147
|
+
@cur = nil
|
148
|
+
end
|
149
|
+
|
150
|
+
def open_parent(**options)
|
151
|
+
name_until_here
|
152
|
+
@ancestors << cur
|
153
|
+
@cur = Node.new(@s, @i + 1, **options)
|
154
|
+
end
|
155
|
+
|
156
|
+
def close_parent
|
157
|
+
return unless @ancestors.count > 1
|
158
|
+
@cur = @ancestors.pop
|
159
|
+
while @cur.auto_close
|
160
|
+
add_child
|
161
|
+
@cur = @ancestors.pop
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def close_all_ancestors
|
166
|
+
while @ancestors.count > 1
|
167
|
+
add_child
|
168
|
+
close_parent
|
169
|
+
end
|
170
|
+
add_child
|
171
|
+
end
|
172
|
+
|
173
|
+
def name_until_here
|
174
|
+
cur.name ||= Node.new(@s, cur.interval.first .. @i-1)
|
175
|
+
end
|
176
|
+
|
177
|
+
def dot_is_not_sep?
|
178
|
+
# if separator is dot ".", don't use it for note detection
|
179
|
+
if @dot_is_not_sep.nil?
|
180
|
+
@dot_is_not_sep = begin
|
181
|
+
# @todo if another separator is found more often, dot is not a separator
|
182
|
+
num_words = @s.split(/\s+/).count
|
183
|
+
num_dots = @s.count(".")
|
184
|
+
# heuristic: 1/4+ of the words has a dot, with at least five words
|
185
|
+
num_words < 5 || 4 * num_dots < num_words
|
186
|
+
end
|
187
|
+
end
|
188
|
+
@dot_is_not_sep
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'treetop'
|
2
|
+
require_relative '../../strict/nodes'
|
3
|
+
Treetop.load File.dirname(__FILE__) + '/../../strict/grammar/common'
|
4
|
+
Treetop.load File.dirname(__FILE__) + '/../../strict/grammar/amount'
|
5
|
+
Treetop.load File.dirname(__FILE__) + '/amount_from_name'
|
6
|
+
|
7
|
+
require_relative '../node'
|
8
|
+
|
9
|
+
module FoodIngredientParser::Loose
|
10
|
+
module Transform
|
11
|
+
# Transforms node tree to extract amount into its own attribute.
|
12
|
+
class Amount
|
13
|
+
def self.transform!(node)
|
14
|
+
new(node).transform!
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(node)
|
18
|
+
@node = node
|
19
|
+
@parser = FoodIngredientParser::Loose::Transform::AmountFromNameParser.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform!
|
23
|
+
transform_name
|
24
|
+
transform_contains
|
25
|
+
@node
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# Extract amount from name, if any.
|
31
|
+
def transform_name(node = @node)
|
32
|
+
if !node.amount && parsed = parse_amount(node.name&.text_value)
|
33
|
+
offset = node.name.interval.first
|
34
|
+
|
35
|
+
amount = parsed.amount.amount
|
36
|
+
node.amount = Node.new(node.input, offset + amount.interval.first .. offset + amount.interval.last - 1)
|
37
|
+
|
38
|
+
name = parsed.respond_to?(:name) && parsed.name
|
39
|
+
if name && name.interval.count > 0
|
40
|
+
node.name = Node.new(node.input, offset + name.interval.first .. offset + name.interval.last - 1)
|
41
|
+
else
|
42
|
+
node.name = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# recursively transform contained nodes
|
47
|
+
node.contains&.each(&method(:transform_name))
|
48
|
+
end
|
49
|
+
|
50
|
+
# If first or last child is an amount, it's this node's amount.
|
51
|
+
# Assumes all names already have extracted their amounts with {{#transform_name}}.
|
52
|
+
def transform_contains(node = @node)
|
53
|
+
if !node.amount && node.contains.any?
|
54
|
+
if node.contains.first.name.nil? && node.contains.first.amount
|
55
|
+
node.amount = node.contains.shift.amount
|
56
|
+
elsif node.contains.count > 1 && node.contains.last.name.nil? && node.contains.last.amount
|
57
|
+
node.amount = node.contains.pop.amount
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# recursively transform contained nodes
|
62
|
+
node.contains.each(&method(:transform_contains))
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse_amount(s)
|
66
|
+
@parser.parse(s) if s && s.strip != ''
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module FoodIngredientParser::Loose::Transform
|
2
|
+
grammar AmountFromName
|
3
|
+
include FoodIngredientParser::Strict::Grammar::Common
|
4
|
+
include FoodIngredientParser::Strict::Grammar::Amount
|
5
|
+
|
6
|
+
rule amount_from_name
|
7
|
+
# just amount, amount in front or at the end
|
8
|
+
ws* amount:amount ws+ name:(.*) /
|
9
|
+
ws* amount:amount ws* /
|
10
|
+
ws* name:( !amount word ( ws+ !amount word )* )+ ws* amount:amount ws*
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
module FoodIngredientParser::Grammar
|
1
|
+
module FoodIngredientParser::Strict::Grammar
|
2
2
|
grammar Amount
|
3
3
|
include Common
|
4
4
|
|
@@ -12,18 +12,19 @@ module FoodIngredientParser::Grammar
|
|
12
12
|
rule simple_amount
|
13
13
|
( (
|
14
14
|
'of which'i / 'at least'i / 'minimal'i / 'maximal'i / 'less than'i / 'more than'i /
|
15
|
-
'waarvan'i / 'ten minste'i / 'tenminste'i / 'minimaal'i / 'maximaal'i / 'minder dan'i / 'meer dan'i
|
15
|
+
'waarvan'i / 'ten minste'i / 'tenminste'i / 'minimaal'i / 'maximaal'i / 'minder dan'i / 'meer dan'i /
|
16
|
+
'min.'i / 'min'i / 'max.'i / 'max'i
|
16
17
|
) ws* )?
|
17
18
|
[±∓~∼∽≂≃≈≲≤<>≥≳]? ws*
|
18
19
|
simple_amount_quantity
|
19
20
|
( ws+ (
|
20
|
-
'minimum'i /
|
21
|
-
'
|
21
|
+
'minimaal'i / 'minimum'i / 'van het uitlekgewicht'i / 'van het geheel'i /
|
22
|
+
'min.'i / 'min'i / 'max.'i / 'max'i
|
22
23
|
) )?
|
23
24
|
end
|
24
25
|
|
25
26
|
rule simple_amount_quantity
|
26
|
-
number ( ws* '-' ws* number )? ws* ( '
|
27
|
+
number ( ws* '-' ws* number )? ws* ( [%٪⁒%﹪] / ( ( 'procent' / 'percent' / 'gram'i / 'ml'i / 'mg'i / 'g'i ) !char ) )
|
27
28
|
end
|
28
29
|
|
29
30
|
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'treetop/runtime'
|
2
|
+
require_relative 'to_html'
|
3
|
+
|
4
|
+
# Needs to be in grammar namespace so Treetop can find the nodes.
|
5
|
+
module FoodIngredientParser::Strict
|
6
|
+
module Grammar
|
7
|
+
|
8
|
+
# Treetop syntax node with our additions, use this as parent for all our own nodes.
|
9
|
+
class SyntaxNode < Treetop::Runtime::SyntaxNode
|
10
|
+
private
|
11
|
+
|
12
|
+
def to_a_deep(n, cls)
|
13
|
+
if n.is_a?(cls)
|
14
|
+
[n]
|
15
|
+
elsif n.nonterminal?
|
16
|
+
n.elements.map {|m| to_a_deep(m, cls) }.flatten(1).compact
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Root object, contains everything else.
|
22
|
+
class RootNode < SyntaxNode
|
23
|
+
include FoodIngredientParser::Strict::ToHtml
|
24
|
+
|
25
|
+
def to_h
|
26
|
+
h = { contains: contains.to_a }
|
27
|
+
if notes && notes_ary = to_a_deep(notes, NoteNode)&.map(&:text_value)
|
28
|
+
h[:notes] = notes_ary if notes_ary.length > 0
|
29
|
+
end
|
30
|
+
h
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# List of ingredients.
|
35
|
+
class ListNode < SyntaxNode
|
36
|
+
def to_a
|
37
|
+
to_a_deep(contains, IngredientNode).map(&:to_h)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Ingredient
|
42
|
+
class IngredientNode < SyntaxNode
|
43
|
+
def to_h
|
44
|
+
h = {}
|
45
|
+
h.merge!(to_a_deep(ing, IngredientNode)&.first&.to_h || {}) if respond_to?(:ing)
|
46
|
+
h.merge!(to_a_deep(amount, AmountNode)&.first&.to_h || {}) if respond_to?(:amount)
|
47
|
+
h[:name] = name.text_value if respond_to?(:name)
|
48
|
+
h[:name] = pre.text_value + h[:name] if respond_to?(:pre)
|
49
|
+
h[:name] = h[:name] + post.text_value if respond_to?(:post)
|
50
|
+
h[:mark] = mark.text_value if respond_to?(:mark) && mark.text_value != ''
|
51
|
+
h
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Ingredient with containing ingredients.
|
56
|
+
class NestedIngredientNode < IngredientNode
|
57
|
+
def to_h
|
58
|
+
super.merge({ contains: to_a_deep(contains, IngredientNode).map(&:to_h) })
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Amount, specifying an ingredient.
|
63
|
+
class AmountNode < SyntaxNode
|
64
|
+
def to_h
|
65
|
+
{ amount: amount.text_value }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Note at the end of the ingredient list.
|
70
|
+
class NoteNode < SyntaxNode
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require_relative 'grammar'
|
2
|
+
require_relative '../cleaner'
|
2
3
|
|
3
|
-
module FoodIngredientParser
|
4
|
+
module FoodIngredientParser::Strict
|
4
5
|
class Parser
|
5
6
|
|
6
7
|
# @!attribute [r] parser
|
@@ -20,22 +21,9 @@ module FoodIngredientParser
|
|
20
21
|
# @return [FoodIngredientParser::Grammar::RootNode] structured representation of food ingredients
|
21
22
|
# @note Unrecognized options are passed to Treetop, but this is not guarenteed to remain so forever.
|
22
23
|
def parse(s, clean: true, **options)
|
23
|
-
s = clean(s) if clean
|
24
|
+
s = FoodIngredientParser::Cleaner.clean(s) if clean
|
24
25
|
@parser.parse(s, **options)
|
25
26
|
end
|
26
27
|
|
27
|
-
private
|
28
|
-
|
29
|
-
def clean(s)
|
30
|
-
s.gsub!("\u00ad", "") # strip soft hyphen
|
31
|
-
s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
|
32
|
-
s.gsub!("aÄs", "aïs") # encoding issue for maïs
|
33
|
-
s.gsub!("ï", "ï") # encoding issue
|
34
|
-
s.gsub!("ë", "ë") # encoding issue
|
35
|
-
s.gsub!(/\A\s*"(.*)"\s*\z/, '\1') # enclosing double quotation marks
|
36
|
-
s.gsub!(/\A\s*'(.*)'\s*\z/, '\1') # enclosing single quotation marks
|
37
|
-
s
|
38
|
-
end
|
39
|
-
|
40
28
|
end
|
41
29
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
# Adds HTML output functionality to a Treetop Node.
|
4
|
+
#
|
5
|
+
# The node needs to provide a {#to_h} method (for {#to_html_h}).
|
6
|
+
#
|
7
|
+
module FoodIngredientParser::Strict
|
8
|
+
module ToHtml
|
9
|
+
|
10
|
+
# Markup original ingredients list text in HTML.
|
11
|
+
#
|
12
|
+
# The input text is returned as HTML, augmented with CSS classes
|
13
|
+
# on +span+s for +name+, +amount+, +mark+ and +note+.
|
14
|
+
#
|
15
|
+
# @return [String] HTML representation of ingredient list.
|
16
|
+
def to_html
|
17
|
+
node_to_html(self)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def node_to_html(node, cls=nil, depth=0)
|
23
|
+
el_cls = {} # map of node instances to class names for contained elements
|
24
|
+
terminal = node.terminal? # whether to look at children elements or not
|
25
|
+
|
26
|
+
if node.is_a?(FoodIngredientParser::Strict::Grammar::AmountNode)
|
27
|
+
cls ||= "amount"
|
28
|
+
elsif node.is_a?(FoodIngredientParser::Strict::Grammar::NoteNode)
|
29
|
+
cls ||= "note"
|
30
|
+
terminal = true # NoteNodes may contain other NoteNodes, we want it flat.
|
31
|
+
elsif node.is_a?(FoodIngredientParser::Strict::Grammar::IngredientNode)
|
32
|
+
el_cls[node.name] = "name" if node.respond_to?(:name)
|
33
|
+
el_cls[node.mark] = "mark" if node.respond_to?(:mark)
|
34
|
+
if node.respond_to?(:contains)
|
35
|
+
el_cls[node.contains] = "contains depth#{depth}"
|
36
|
+
depth += 1
|
37
|
+
end
|
38
|
+
elsif node.is_a?(FoodIngredientParser::Strict::Grammar::RootNode)
|
39
|
+
if node.respond_to?(:contains)
|
40
|
+
el_cls[node.contains] = "depth#{depth}"
|
41
|
+
depth += 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
val = if terminal
|
46
|
+
CGI.escapeHTML(node.text_value)
|
47
|
+
else
|
48
|
+
node.elements.map {|el| node_to_html(el, el_cls[el], depth) }.join("")
|
49
|
+
end
|
50
|
+
|
51
|
+
cls ? "<span class='#{cls}'>#{val}</span>" : val
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_ingredient_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.pre.
|
4
|
+
version: 1.0.0.pre.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|
@@ -42,20 +42,26 @@ files:
|
|
42
42
|
- bin/food_ingredient_parser
|
43
43
|
- food_ingredient_parser.gemspec
|
44
44
|
- lib/food_ingredient_parser.rb
|
45
|
-
- lib/food_ingredient_parser/
|
46
|
-
- lib/food_ingredient_parser/
|
47
|
-
- lib/food_ingredient_parser/
|
48
|
-
- lib/food_ingredient_parser/
|
49
|
-
- lib/food_ingredient_parser/
|
50
|
-
- lib/food_ingredient_parser/
|
51
|
-
- lib/food_ingredient_parser/grammar
|
52
|
-
- lib/food_ingredient_parser/grammar/
|
53
|
-
- lib/food_ingredient_parser/grammar/
|
54
|
-
- lib/food_ingredient_parser/grammar/
|
55
|
-
- lib/food_ingredient_parser/grammar/
|
56
|
-
- lib/food_ingredient_parser/
|
57
|
-
- lib/food_ingredient_parser/
|
58
|
-
- lib/food_ingredient_parser/
|
45
|
+
- lib/food_ingredient_parser/cleaner.rb
|
46
|
+
- lib/food_ingredient_parser/loose/node.rb
|
47
|
+
- lib/food_ingredient_parser/loose/parser.rb
|
48
|
+
- lib/food_ingredient_parser/loose/scanner.rb
|
49
|
+
- lib/food_ingredient_parser/loose/transform/amount.rb
|
50
|
+
- lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
|
51
|
+
- lib/food_ingredient_parser/strict/grammar.rb
|
52
|
+
- lib/food_ingredient_parser/strict/grammar/amount.treetop
|
53
|
+
- lib/food_ingredient_parser/strict/grammar/common.treetop
|
54
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient.treetop
|
55
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient_coloned.treetop
|
56
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient_nested.treetop
|
57
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient_simple.treetop
|
58
|
+
- lib/food_ingredient_parser/strict/grammar/list.treetop
|
59
|
+
- lib/food_ingredient_parser/strict/grammar/list_coloned.treetop
|
60
|
+
- lib/food_ingredient_parser/strict/grammar/list_newlined.treetop
|
61
|
+
- lib/food_ingredient_parser/strict/grammar/root.treetop
|
62
|
+
- lib/food_ingredient_parser/strict/nodes.rb
|
63
|
+
- lib/food_ingredient_parser/strict/parser.rb
|
64
|
+
- lib/food_ingredient_parser/strict/to_html.rb
|
59
65
|
- lib/food_ingredient_parser/version.rb
|
60
66
|
homepage: https://github.com/q-m/food-ingredient-parser-ruby
|
61
67
|
licenses:
|
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'treetop/runtime'
|
2
|
-
require_relative 'to_html'
|
3
|
-
|
4
|
-
# Needs to be in grammar namespace so Treetop can find the nodes.
|
5
|
-
module FoodIngredientParser::Grammar
|
6
|
-
|
7
|
-
# Treetop syntax node with our additions, use this as parent for all our own nodes.
|
8
|
-
class SyntaxNode < Treetop::Runtime::SyntaxNode
|
9
|
-
private
|
10
|
-
|
11
|
-
def to_a_deep(n, cls)
|
12
|
-
if n.is_a?(cls)
|
13
|
-
[n]
|
14
|
-
elsif n.nonterminal?
|
15
|
-
n.elements.map {|m| to_a_deep(m, cls) }.flatten(1).compact
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
# Root object, contains everything else.
|
21
|
-
class RootNode < SyntaxNode
|
22
|
-
include FoodIngredientParser::ToHtml
|
23
|
-
|
24
|
-
def to_h
|
25
|
-
h = { contains: contains.to_a }
|
26
|
-
if notes && notes_ary = to_a_deep(notes, NoteNode)&.map(&:text_value)
|
27
|
-
h[:notes] = notes_ary if notes_ary.length > 0
|
28
|
-
end
|
29
|
-
h
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# List of ingredients.
|
34
|
-
class ListNode < SyntaxNode
|
35
|
-
def to_a
|
36
|
-
to_a_deep(contains, IngredientNode).map(&:to_h)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
# Ingredient
|
41
|
-
class IngredientNode < SyntaxNode
|
42
|
-
def to_h
|
43
|
-
h = {}
|
44
|
-
h.merge!(to_a_deep(ing, IngredientNode)&.first&.to_h || {}) if respond_to?(:ing)
|
45
|
-
h.merge!(to_a_deep(amount, AmountNode)&.first&.to_h || {}) if respond_to?(:amount)
|
46
|
-
h[:name] = name.text_value if respond_to?(:name)
|
47
|
-
h[:name] = pre.text_value + h[:name] if respond_to?(:pre)
|
48
|
-
h[:name] = h[:name] + post.text_value if respond_to?(:post)
|
49
|
-
h[:mark] = mark.text_value if respond_to?(:mark) && mark.text_value != ''
|
50
|
-
h
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Ingredient with containing ingredients.
|
55
|
-
class NestedIngredientNode < IngredientNode
|
56
|
-
def to_h
|
57
|
-
super.merge({ contains: to_a_deep(contains, IngredientNode).map(&:to_h) })
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# Amount, specifying an ingredient.
|
62
|
-
class AmountNode < SyntaxNode
|
63
|
-
def to_h
|
64
|
-
{ amount: amount.text_value }
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
# Note at the end of the ingredient list.
|
69
|
-
class NoteNode < SyntaxNode
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
|
3
|
-
# Adds HTML output functionality to a Treetop Node.
|
4
|
-
#
|
5
|
-
# The node needs to provide a {#to_h} method (for {#to_html_h}).
|
6
|
-
#
|
7
|
-
module FoodIngredientParser::ToHtml
|
8
|
-
|
9
|
-
# Markup original ingredients list text in HTML.
|
10
|
-
#
|
11
|
-
# The input text is returned as HTML, augmented with CSS classes
|
12
|
-
# on +span+s for +name+, +amount+, +mark+ and +note+.
|
13
|
-
#
|
14
|
-
# @return [String] HTML representation of ingredient list.
|
15
|
-
def to_html
|
16
|
-
node_to_html(self)
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def node_to_html(node, cls=nil, depth=0)
|
22
|
-
el_cls = {} # map of node instances to class names for contained elements
|
23
|
-
terminal = node.terminal? # whether to look at children elements or not
|
24
|
-
|
25
|
-
if node.is_a?(FoodIngredientParser::Grammar::AmountNode)
|
26
|
-
cls ||= "amount"
|
27
|
-
elsif node.is_a?(FoodIngredientParser::Grammar::NoteNode)
|
28
|
-
cls ||= "note"
|
29
|
-
terminal = true # NoteNodes may contain other NoteNodes, we want it flat.
|
30
|
-
elsif node.is_a?(FoodIngredientParser::Grammar::IngredientNode)
|
31
|
-
el_cls[node.name] = "name" if node.respond_to?(:name)
|
32
|
-
el_cls[node.mark] = "mark" if node.respond_to?(:mark)
|
33
|
-
if node.respond_to?(:contains)
|
34
|
-
el_cls[node.contains] = "contains depth#{depth}"
|
35
|
-
depth += 1
|
36
|
-
end
|
37
|
-
elsif node.is_a?(FoodIngredientParser::Grammar::RootNode)
|
38
|
-
if node.respond_to?(:contains)
|
39
|
-
el_cls[node.contains] = "depth#{depth}"
|
40
|
-
depth += 1
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
val = if terminal
|
45
|
-
CGI.escapeHTML(node.text_value)
|
46
|
-
else
|
47
|
-
node.elements.map {|el| node_to_html(el, el_cls[el], depth) }.join("")
|
48
|
-
end
|
49
|
-
|
50
|
-
cls ? "<span class='#{cls}'>#{val}</span>" : val
|
51
|
-
end
|
52
|
-
end
|
File without changes
|