food_ingredient_parser 1.0.0.pre.5 → 1.0.0.pre.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +44 -8
- data/bin/food_ingredient_parser +13 -5
- data/lib/food_ingredient_parser/cleaner.rb +16 -0
- data/lib/food_ingredient_parser/loose/node.rb +60 -0
- data/lib/food_ingredient_parser/loose/parser.rb +24 -0
- data/lib/food_ingredient_parser/loose/scanner.rb +191 -0
- data/lib/food_ingredient_parser/loose/transform/amount.rb +70 -0
- data/lib/food_ingredient_parser/loose/transform/amount_from_name.treetop +13 -0
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/amount.treetop +6 -5
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/common.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient_coloned.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient_nested.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/ingredient_simple.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/list.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/list_coloned.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/list_newlined.treetop +1 -1
- data/lib/food_ingredient_parser/{grammar → strict/grammar}/root.treetop +1 -1
- data/lib/food_ingredient_parser/strict/nodes.rb +74 -0
- data/lib/food_ingredient_parser/{parser.rb → strict/parser.rb} +3 -15
- data/lib/food_ingredient_parser/strict/to_html.rb +54 -0
- data/lib/food_ingredient_parser/version.rb +2 -2
- data/lib/food_ingredient_parser.rb +2 -1
- metadata +22 -16
- data/lib/food_ingredient_parser/nodes.rb +0 -72
- data/lib/food_ingredient_parser/to_html.rb +0 -52
- /data/lib/food_ingredient_parser/{grammar.rb → strict/grammar.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54bdb9187f9a2dfbec67737ddc2a3ad90f4ca058
|
4
|
+
data.tar.gz: fcfc99674e0f58801ca3a375acebe91ba3f80c84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 773526e862a74f04614486f3542de0c89f8663e10f959978a7d3a2e1ba8703e8a9ae93bde6b043a62326be68956733a30e6c01d85b6578dba0abc9064590fb18
|
7
|
+
data.tar.gz: e3296ae3222745f20727eed70bd1a9dac84a1714f1bdc64c2cd524bfbd6589a470afea33eab1e4ce717dbec9f4b3b3d8d9350025d13a4046e78cea3d0aea9b6d
|
data/README.md
CHANGED
@@ -22,11 +22,11 @@ require 'food_ingredient_parser'
|
|
22
22
|
s = "Water* 60%, suiker 30%, voedingszuren: citroenzuur, appelzuur, zuurteregelaar: E576/E577, " \
|
23
23
|
+ "natuurlijke citroen-limoen aroma's 0,2%, zoetstof: steviolglycosiden, * = Biologisch. " \
|
24
24
|
+ "E = door de E.U. goedgekeurde toevoeging."
|
25
|
-
parser = FoodIngredientParser::Parser.new
|
25
|
+
parser = FoodIngredientParser::Strict::Parser.new
|
26
26
|
puts parser.parse(s).to_h.inspect
|
27
27
|
```
|
28
28
|
Results in
|
29
|
-
```
|
29
|
+
```ruby
|
30
30
|
{
|
31
31
|
:contains=>[
|
32
32
|
{:name=>"Water", :amount=>"60%", :mark=>"*"},
|
@@ -58,14 +58,15 @@ running this from the source tree, use `bin/food_ingredient_parser` instead.
|
|
58
58
|
|
59
59
|
```
|
60
60
|
$ food_ingredient_parser -h
|
61
|
-
Usage: food_ingredient_parser [options] --file|-f <filename>
|
62
|
-
food_ingredient_parser [options] --string|-s <ingredients>
|
61
|
+
Usage: bin/food_ingredient_parser [options] --file|-f <filename>
|
62
|
+
bin/food_ingredient_parser [options] --string|-s <ingredients>
|
63
63
|
|
64
64
|
-f, --file FILE Parse all lines of the file as ingredient lists.
|
65
65
|
-s, --string INGREDIENTS Parse specified ingredient list.
|
66
66
|
-q, --[no-]quiet Only show summary.
|
67
67
|
-p, --parsed Only show lines that were successfully parsed.
|
68
|
-
-
|
68
|
+
-r, --parser PARSER Use specific parser (strict, loose).
|
69
|
+
-e, --[no-]escape Escape newlines
|
69
70
|
-c, --[no-]color Use color
|
70
71
|
-n, --noresult Only show lines that had no result.
|
71
72
|
-v, --[no-]verbose Show more data (parsed tree).
|
@@ -102,6 +103,12 @@ RootNode+Root3 offset=0, "tomato" (contains,notes):
|
|
102
103
|
SyntaxNode offset=6, ""
|
103
104
|
{:contains=>[{:name=>"tomato"}]}
|
104
105
|
|
106
|
+
$ food_ingredient_parser -v -r loose -s "tomato"
|
107
|
+
"tomato"
|
108
|
+
Node interval=0..5
|
109
|
+
Node interval=0..5, name="tomato"
|
110
|
+
{:contains=>[{:name=>"tomato"}]}
|
111
|
+
|
105
112
|
$ food_ingredient_parser -q -f data/test-cases
|
106
113
|
parsed 35 (100.0%), no result 0 (0.0%)
|
107
114
|
```
|
@@ -114,12 +121,12 @@ When ingredient lists are entered manually, it can be very useful to show how th
|
|
114
121
|
recognized. This can help understanding why a certain ingredients list cannot be parsed.
|
115
122
|
|
116
123
|
For this you can use the `to_html` method on the parsed output, which returns the original
|
117
|
-
text, augmented with CSS classes for different parts.
|
124
|
+
text, augmented with CSS classes for different parts. (Available for strict parser only.)
|
118
125
|
|
119
126
|
```ruby
|
120
127
|
require 'food_ingredient_parser'
|
121
128
|
|
122
|
-
parsed = FoodIngredientParser::Parser.new.parse("Saus (10% tomaat*, zout). * = bio")
|
129
|
+
parsed = FoodIngredientParser::Strict::Parser.new.parse("Saus (10% tomaat*, zout). * = bio")
|
123
130
|
puts parsed.to_html
|
124
131
|
```
|
125
132
|
|
@@ -138,9 +145,38 @@ For an example of an interactive editor, see [examples/editor.rb](examples/edito
|
|
138
145
|
|
139
146
|

|
140
147
|
|
148
|
+
## Loose parser
|
149
|
+
|
150
|
+
The strict parser only parses ingredient lists that conform to one of the many different
|
151
|
+
formats expected. If you'd like to return a result always, even if that is not necessarily
|
152
|
+
completely correct, you can use the _loose_ parser. This does not use Treetop, but looks
|
153
|
+
at the input character for character and tries to make the best of it. Nevertheless, if you
|
154
|
+
just want to have _some_ result, this can still be very useful.
|
155
|
+
|
156
|
+
```ruby
|
157
|
+
require 'food_ingredient_parser'
|
158
|
+
|
159
|
+
parsed = FoodIngredientParser::Loose::Parser.new.parse("Saus [10% tomaat*, (zout); peper.")
|
160
|
+
puts parsed.to_h
|
161
|
+
```
|
162
|
+
|
163
|
+
Even though the strict parser would not give a result, the loose parser returns:
|
164
|
+
```ruby
|
165
|
+
{
|
166
|
+
:contains=>[
|
167
|
+
{:name=>"Saus", :contains=>[
|
168
|
+
{:name=>"tomaat", :mark=>"*", :amount=>"10%"},
|
169
|
+
{:contains=>[{:name=>"zout"}]},
|
170
|
+
{:name=>"peper"}
|
171
|
+
]}
|
172
|
+
]
|
173
|
+
}
|
174
|
+
```
|
175
|
+
|
141
176
|
## Test data
|
142
177
|
|
143
178
|
[`data/ingredient-samples-nl`](data/ingredient-samples-nl) contains about 150k
|
144
179
|
real-world ingredient lists found on the Dutch market. Each line contains one ingredient
|
145
180
|
list (newlines are encoded as `\n`, empty lines and those starting with `#` are ignored).
|
146
|
-
|
181
|
+
The strict parser currently parses about three quarter, while the loose parser returns
|
182
|
+
something for all of them.
|
data/bin/food_ingredient_parser
CHANGED
@@ -31,8 +31,7 @@ def colorize(color, s)
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
def parse_single(s, parsed=nil, parser
|
35
|
-
parser ||= FoodIngredientParser::Parser.new
|
34
|
+
def parse_single(s, parsed=nil, parser:, verbosity: 1, print: nil, escape: false, color: false)
|
36
35
|
parsed ||= parser.parse(s)
|
37
36
|
|
38
37
|
return unless print.nil? || (parsed && print == :parsed) || (!parsed && print == :noresult)
|
@@ -47,7 +46,7 @@ def parse_single(s, parsed=nil, parser: nil, verbosity: 1, print: nil, escape: f
|
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
50
|
-
def parse_file(path, parser
|
49
|
+
def parse_file(path, parser:, verbosity: 1, print: nil, escape: false, color: false)
|
51
50
|
count_parsed = count_noresult = 0
|
52
51
|
File.foreach(path) do |line|
|
53
52
|
next if line =~ /^#/ # comment
|
@@ -70,8 +69,13 @@ verbosity = 1
|
|
70
69
|
files = []
|
71
70
|
strings = []
|
72
71
|
print = nil
|
72
|
+
parser_name = :strict
|
73
73
|
escape = false
|
74
74
|
color = true
|
75
|
+
PARSERS = {
|
76
|
+
strict: FoodIngredientParser::Strict::Parser,
|
77
|
+
loose: FoodIngredientParser::Loose::Parser
|
78
|
+
}
|
75
79
|
OptionParser.new do |opts|
|
76
80
|
opts.banner = <<-EOF.gsub(/^ /, '')
|
77
81
|
Usage: #{$0} [options] --file|-f <filename>
|
@@ -84,7 +88,8 @@ OptionParser.new do |opts|
|
|
84
88
|
|
85
89
|
opts.on("-q", "--[no-]quiet", "Only show summary.") {|q| verbosity = q ? 0 : 1 }
|
86
90
|
opts.on("-p", "--parsed", "Only show lines that were successfully parsed.") {|p| print = :parsed }
|
87
|
-
opts.on("-
|
91
|
+
opts.on("-r", "--parser PARSER", "Use specific parser (#{PARSERS.keys.join(", ")}).") {|p| parser_name = p&.downcase&.to_sym }
|
92
|
+
opts.on("-e", "--[no-]escape", "Escape newlines") {|e| escape = !!e }
|
88
93
|
opts.on("-c", "--[no-]color", "Use color") {|e| color = !!e }
|
89
94
|
opts.on("-n", "--noresult", "Only show lines that had no result.") {|p| print = :noresult }
|
90
95
|
opts.on("-v", "--[no-]verbose", "Show more data (parsed tree).") {|v| verbosity = v ? 2 : 1 }
|
@@ -99,7 +104,10 @@ OptionParser.new do |opts|
|
|
99
104
|
end.parse!
|
100
105
|
|
101
106
|
if strings.any? || files.any?
|
102
|
-
parser =
|
107
|
+
unless parser = PARSERS[parser_name]&.new
|
108
|
+
STDERR.puts("Please specify one of the known parsers: #{PARSERS.keys.join(", ")}.")
|
109
|
+
exit(1)
|
110
|
+
end
|
103
111
|
strings.each {|s| parse_single(s, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
|
104
112
|
files.each {|f| parse_file(f, parser: parser, verbosity: verbosity, print: print, escape: escape, color: color) }
|
105
113
|
else
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module FoodIngredientParser
|
2
|
+
module Cleaner
|
3
|
+
|
4
|
+
def self.clean(s)
|
5
|
+
s.gsub!("\u00ad", "") # strip soft hyphen
|
6
|
+
s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
|
7
|
+
s.gsub!("aÄs", "aïs") # encoding issue for maïs
|
8
|
+
s.gsub!("ï", "ï") # encoding issue
|
9
|
+
s.gsub!("ë", "ë") # encoding issue
|
10
|
+
s.gsub!(/\A\s*"(.*)"\s*\z/, '\1') # enclosing double quotation marks
|
11
|
+
s.gsub!(/\A\s*'(.*)'\s*\z/, '\1') # enclosing single quotation marks
|
12
|
+
s
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module FoodIngredientParser::Loose
|
2
|
+
# Parsing result.
|
3
|
+
class Node
|
4
|
+
attr_accessor :name, :mark, :amount, :contains, :notes
|
5
|
+
attr_reader :input, :interval, :auto_close
|
6
|
+
|
7
|
+
def initialize(input, interval, auto_close: false)
|
8
|
+
@input = input
|
9
|
+
@interval = interval.is_a?(Range) ? interval : ( interval .. interval )
|
10
|
+
@auto_close = auto_close
|
11
|
+
@contains = []
|
12
|
+
@notes = []
|
13
|
+
@name = @mark = @amount = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
def ends(index)
|
17
|
+
@interval = @interval.first .. index
|
18
|
+
end
|
19
|
+
|
20
|
+
def <<(child)
|
21
|
+
@contains << child
|
22
|
+
end
|
23
|
+
|
24
|
+
def text_value
|
25
|
+
@input[@interval]
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_h
|
29
|
+
r = {}
|
30
|
+
r[:name] = name.text_value.strip if name && name.text_value.strip != ''
|
31
|
+
r[:mark] = mark.text_value.strip if mark
|
32
|
+
r[:amount] = amount.text_value.strip if amount
|
33
|
+
r[:contains] = contains.map(&:to_h).reject {|c| c == {} } if contains.any?
|
34
|
+
r[:notes] = notes.map{|n| n.text_value.strip }.reject {|c| c == '' } if notes.any?
|
35
|
+
r
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect(indent="", variant="")
|
39
|
+
inspect_self(indent, variant) +
|
40
|
+
inspect_children(indent)
|
41
|
+
end
|
42
|
+
|
43
|
+
def inspect_self(indent="", variant="")
|
44
|
+
[
|
45
|
+
indent + "Node#{variant} interval=#{@interval}",
|
46
|
+
name ? "name=#{name.text_value.strip.inspect}" : nil,
|
47
|
+
mark ? "mark=#{mark.text_value.strip.inspect}" : nil,
|
48
|
+
amount ? "amount=#{amount.text_value.strip.inspect}" : nil,
|
49
|
+
auto_close ? "auto_close" : nil
|
50
|
+
].compact.join(", ")
|
51
|
+
end
|
52
|
+
|
53
|
+
def inspect_children(indent="")
|
54
|
+
[
|
55
|
+
*contains.map {|child| "\n" + child.inspect(indent + " ") },
|
56
|
+
*notes.map {|note| "\n" + note.inspect(indent + " ", "(note)") }
|
57
|
+
].join("")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative '../cleaner'
|
2
|
+
require_relative 'scanner'
|
3
|
+
require_relative 'transform/amount'
|
4
|
+
|
5
|
+
module FoodIngredientParser::Loose
|
6
|
+
class Parser
|
7
|
+
|
8
|
+
# Create a new food ingredient stream parser
|
9
|
+
# @return [FoodIngredientParser::StreamParser]
|
10
|
+
def initialize
|
11
|
+
end
|
12
|
+
|
13
|
+
# Parse food ingredient list text into a structured representation.
|
14
|
+
#
|
15
|
+
# @option clean [Boolean] pass +false+ to disable correcting frequently occuring issues
|
16
|
+
# @return [FoodIngredientParser::Loose::Node] structured representation of food ingredients
|
17
|
+
def parse(s, clean: true, **options)
|
18
|
+
s = FoodIngredientParser::Cleaner.clean(s) if clean
|
19
|
+
n = Scanner.new(s).scan
|
20
|
+
n = Transform::Amount.transform!(n) if n
|
21
|
+
n
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
require_relative 'node'
|
2
|
+
|
3
|
+
module FoodIngredientParser::Loose
|
4
|
+
class Scanner
|
5
|
+
|
6
|
+
SEP_CHARS = "|;,.".freeze
|
7
|
+
MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡•°#^*".freeze
|
8
|
+
PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\s*[:;.]\s*/i.freeze
|
9
|
+
NOTE_RE = /\A\b(dit product kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b)/i.freeze
|
10
|
+
|
11
|
+
def initialize(s, index: 0)
|
12
|
+
@s = s # input string
|
13
|
+
@i = index # current index in string
|
14
|
+
@cur = nil # current node we're populating
|
15
|
+
@ancestors = [Node.new(@s, @i)] # nesting hierarchy
|
16
|
+
@iterator = :beginning # scan_iteration_<iterator> to use for parsing
|
17
|
+
@dest = :contains # append current node to this attribute on parent
|
18
|
+
end
|
19
|
+
|
20
|
+
def scan
|
21
|
+
loop do
|
22
|
+
method(:"scan_iteration_#{@iterator}").call
|
23
|
+
end
|
24
|
+
|
25
|
+
close_all_ancestors
|
26
|
+
@ancestors.first.ends(@i-1)
|
27
|
+
@ancestors.first
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def loop
|
33
|
+
while @i < @s.length
|
34
|
+
@i += 1 if yield != false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def scan_iteration_beginning
|
39
|
+
# skip over some common prefixes
|
40
|
+
m = @s[@i .. -1].match(PREFIX_RE)
|
41
|
+
@i += m.offset(0).last if m
|
42
|
+
# now continue with the standard parsing
|
43
|
+
@iterator = :standard
|
44
|
+
false
|
45
|
+
end
|
46
|
+
|
47
|
+
def scan_iteration_standard
|
48
|
+
if "([".include?(c) # open nesting
|
49
|
+
open_parent
|
50
|
+
elsif ")]".include?(c) # close nesting
|
51
|
+
add_child
|
52
|
+
close_parent
|
53
|
+
elsif is_notes_start? # usually a dot marks the start of notes
|
54
|
+
close_all_ancestors
|
55
|
+
@iterator = :notes
|
56
|
+
@dest = :notes
|
57
|
+
elsif is_sep? # separator
|
58
|
+
add_child
|
59
|
+
elsif ":".include?(c) # another open nesting
|
60
|
+
add_child
|
61
|
+
open_parent(auto_close: true)
|
62
|
+
@iterator = :colon
|
63
|
+
elsif is_mark? && !cur.mark # mark after ingredient
|
64
|
+
name_until_here
|
65
|
+
len = mark_len
|
66
|
+
cur.mark = Node.new(@s, @i .. @i+len-1)
|
67
|
+
@i += len - 1
|
68
|
+
else
|
69
|
+
cur # reference to record starting position
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def scan_iteration_colon
|
74
|
+
if "/".include?(c) # slash separator in colon nesting only
|
75
|
+
add_child
|
76
|
+
elsif is_sep? # regular separator indicates end of colon nesting
|
77
|
+
add_child
|
78
|
+
close_parent
|
79
|
+
# revert to standard parsing from here on
|
80
|
+
@iterator = :standard
|
81
|
+
scan_iteration_standard
|
82
|
+
elsif "([]):".include?(c) # continue with deeper nesting level
|
83
|
+
# revert to standard parsing from here on
|
84
|
+
@iterator = :standard
|
85
|
+
scan_iteration_standard
|
86
|
+
else
|
87
|
+
# normal handling for this character
|
88
|
+
scan_iteration_standard
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def scan_iteration_notes
|
93
|
+
if is_sep?(chars: ".") # dot means new note
|
94
|
+
add_child
|
95
|
+
else
|
96
|
+
cur
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def c
|
101
|
+
@s[@i]
|
102
|
+
end
|
103
|
+
|
104
|
+
def parent
|
105
|
+
@ancestors.last
|
106
|
+
end
|
107
|
+
|
108
|
+
def cur
|
109
|
+
@cur ||= Node.new(@s, @i)
|
110
|
+
end
|
111
|
+
|
112
|
+
def is_mark?
|
113
|
+
mark_len > 0 && @s[@i..@i+1] !~ /\A°[CF]/
|
114
|
+
end
|
115
|
+
|
116
|
+
def is_sep?(chars: SEP_CHARS)
|
117
|
+
chars.include?(c) && @s[@i-1..@i+1] !~ /\A\d.\d\z/
|
118
|
+
end
|
119
|
+
|
120
|
+
def mark_len
|
121
|
+
i = @i
|
122
|
+
while @s[i] && MARK_CHARS.include?(@s[i])
|
123
|
+
i += 1
|
124
|
+
end
|
125
|
+
i - @i
|
126
|
+
end
|
127
|
+
|
128
|
+
def is_notes_start?
|
129
|
+
# @todo use more heuristics: don't assume dot is notes when separator is a dot, and only toplevel?
|
130
|
+
if ( is_mark? && @s[@i+mark_len..-1] =~ /\A\s*=/ ) || # "* = Biologisch"
|
131
|
+
( is_mark? && @s[@i-2..@i-1] =~ /\A\s\s/ ) || # " **Biologisch"
|
132
|
+
( @s[@i..-1] =~ NOTE_RE ) # "E=", "Kan sporen van", ...
|
133
|
+
@i -= 1 # we want to include the mark in the note
|
134
|
+
true
|
135
|
+
# End of sentence
|
136
|
+
elsif dot_is_not_sep? && is_sep?(chars: ".")
|
137
|
+
true
|
138
|
+
else
|
139
|
+
false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def add_child
|
144
|
+
cur.ends(@i-1)
|
145
|
+
cur.name ||= Node.new(@s, cur.interval)
|
146
|
+
parent.send(@dest) << cur
|
147
|
+
@cur = nil
|
148
|
+
end
|
149
|
+
|
150
|
+
def open_parent(**options)
|
151
|
+
name_until_here
|
152
|
+
@ancestors << cur
|
153
|
+
@cur = Node.new(@s, @i + 1, **options)
|
154
|
+
end
|
155
|
+
|
156
|
+
def close_parent
|
157
|
+
return unless @ancestors.count > 1
|
158
|
+
@cur = @ancestors.pop
|
159
|
+
while @cur.auto_close
|
160
|
+
add_child
|
161
|
+
@cur = @ancestors.pop
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def close_all_ancestors
|
166
|
+
while @ancestors.count > 1
|
167
|
+
add_child
|
168
|
+
close_parent
|
169
|
+
end
|
170
|
+
add_child
|
171
|
+
end
|
172
|
+
|
173
|
+
def name_until_here
|
174
|
+
cur.name ||= Node.new(@s, cur.interval.first .. @i-1)
|
175
|
+
end
|
176
|
+
|
177
|
+
def dot_is_not_sep?
|
178
|
+
# if separator is dot ".", don't use it for note detection
|
179
|
+
if @dot_is_not_sep.nil?
|
180
|
+
@dot_is_not_sep = begin
|
181
|
+
# @todo if another separator is found more often, dot is not a separator
|
182
|
+
num_words = @s.split(/\s+/).count
|
183
|
+
num_dots = @s.count(".")
|
184
|
+
# heuristic: 1/4+ of the words has a dot, with at least five words
|
185
|
+
num_words < 5 || 4 * num_dots < num_words
|
186
|
+
end
|
187
|
+
end
|
188
|
+
@dot_is_not_sep
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'treetop'
|
2
|
+
require_relative '../../strict/nodes'
|
3
|
+
Treetop.load File.dirname(__FILE__) + '/../../strict/grammar/common'
|
4
|
+
Treetop.load File.dirname(__FILE__) + '/../../strict/grammar/amount'
|
5
|
+
Treetop.load File.dirname(__FILE__) + '/amount_from_name'
|
6
|
+
|
7
|
+
require_relative '../node'
|
8
|
+
|
9
|
+
module FoodIngredientParser::Loose
|
10
|
+
module Transform
|
11
|
+
# Transforms node tree to extract amount into its own attribute.
|
12
|
+
class Amount
|
13
|
+
def self.transform!(node)
|
14
|
+
new(node).transform!
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(node)
|
18
|
+
@node = node
|
19
|
+
@parser = FoodIngredientParser::Loose::Transform::AmountFromNameParser.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform!
|
23
|
+
transform_name
|
24
|
+
transform_contains
|
25
|
+
@node
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# Extract amount from name, if any.
|
31
|
+
def transform_name(node = @node)
|
32
|
+
if !node.amount && parsed = parse_amount(node.name&.text_value)
|
33
|
+
offset = node.name.interval.first
|
34
|
+
|
35
|
+
amount = parsed.amount.amount
|
36
|
+
node.amount = Node.new(node.input, offset + amount.interval.first .. offset + amount.interval.last - 1)
|
37
|
+
|
38
|
+
name = parsed.respond_to?(:name) && parsed.name
|
39
|
+
if name && name.interval.count > 0
|
40
|
+
node.name = Node.new(node.input, offset + name.interval.first .. offset + name.interval.last - 1)
|
41
|
+
else
|
42
|
+
node.name = nil
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# recursively transform contained nodes
|
47
|
+
node.contains&.each(&method(:transform_name))
|
48
|
+
end
|
49
|
+
|
50
|
+
# If first or last child is an amount, it's this node's amount.
|
51
|
+
# Assumes all names already have extracted their amounts with {{#transform_name}}.
|
52
|
+
def transform_contains(node = @node)
|
53
|
+
if !node.amount && node.contains.any?
|
54
|
+
if node.contains.first.name.nil? && node.contains.first.amount
|
55
|
+
node.amount = node.contains.shift.amount
|
56
|
+
elsif node.contains.count > 1 && node.contains.last.name.nil? && node.contains.last.amount
|
57
|
+
node.amount = node.contains.pop.amount
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# recursively transform contained nodes
|
62
|
+
node.contains.each(&method(:transform_contains))
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse_amount(s)
|
66
|
+
@parser.parse(s) if s && s.strip != ''
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module FoodIngredientParser::Loose::Transform
|
2
|
+
grammar AmountFromName
|
3
|
+
include FoodIngredientParser::Strict::Grammar::Common
|
4
|
+
include FoodIngredientParser::Strict::Grammar::Amount
|
5
|
+
|
6
|
+
rule amount_from_name
|
7
|
+
# just amount, amount in front or at the end
|
8
|
+
ws* amount:amount ws+ name:(.*) /
|
9
|
+
ws* amount:amount ws* /
|
10
|
+
ws* name:( !amount word ( ws+ !amount word )* )+ ws* amount:amount ws*
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
module FoodIngredientParser::Grammar
|
1
|
+
module FoodIngredientParser::Strict::Grammar
|
2
2
|
grammar Amount
|
3
3
|
include Common
|
4
4
|
|
@@ -12,18 +12,19 @@ module FoodIngredientParser::Grammar
|
|
12
12
|
rule simple_amount
|
13
13
|
( (
|
14
14
|
'of which'i / 'at least'i / 'minimal'i / 'maximal'i / 'less than'i / 'more than'i /
|
15
|
-
'waarvan'i / 'ten minste'i / 'tenminste'i / 'minimaal'i / 'maximaal'i / 'minder dan'i / 'meer dan'i
|
15
|
+
'waarvan'i / 'ten minste'i / 'tenminste'i / 'minimaal'i / 'maximaal'i / 'minder dan'i / 'meer dan'i /
|
16
|
+
'min.'i / 'min'i / 'max.'i / 'max'i
|
16
17
|
) ws* )?
|
17
18
|
[±∓~∼∽≂≃≈≲≤<>≥≳]? ws*
|
18
19
|
simple_amount_quantity
|
19
20
|
( ws+ (
|
20
|
-
'minimum'i /
|
21
|
-
'
|
21
|
+
'minimaal'i / 'minimum'i / 'van het uitlekgewicht'i / 'van het geheel'i /
|
22
|
+
'min.'i / 'min'i / 'max.'i / 'max'i
|
22
23
|
) )?
|
23
24
|
end
|
24
25
|
|
25
26
|
rule simple_amount_quantity
|
26
|
-
number ( ws* '-' ws* number )? ws* ( '
|
27
|
+
number ( ws* '-' ws* number )? ws* ( [%٪⁒%﹪] / ( ( 'procent' / 'percent' / 'gram'i / 'ml'i / 'mg'i / 'g'i ) !char ) )
|
27
28
|
end
|
28
29
|
|
29
30
|
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'treetop/runtime'
|
2
|
+
require_relative 'to_html'
|
3
|
+
|
4
|
+
# Needs to be in grammar namespace so Treetop can find the nodes.
|
5
|
+
module FoodIngredientParser::Strict
|
6
|
+
module Grammar
|
7
|
+
|
8
|
+
# Treetop syntax node with our additions, use this as parent for all our own nodes.
|
9
|
+
class SyntaxNode < Treetop::Runtime::SyntaxNode
|
10
|
+
private
|
11
|
+
|
12
|
+
def to_a_deep(n, cls)
|
13
|
+
if n.is_a?(cls)
|
14
|
+
[n]
|
15
|
+
elsif n.nonterminal?
|
16
|
+
n.elements.map {|m| to_a_deep(m, cls) }.flatten(1).compact
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Root object, contains everything else.
|
22
|
+
class RootNode < SyntaxNode
|
23
|
+
include FoodIngredientParser::Strict::ToHtml
|
24
|
+
|
25
|
+
def to_h
|
26
|
+
h = { contains: contains.to_a }
|
27
|
+
if notes && notes_ary = to_a_deep(notes, NoteNode)&.map(&:text_value)
|
28
|
+
h[:notes] = notes_ary if notes_ary.length > 0
|
29
|
+
end
|
30
|
+
h
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# List of ingredients.
|
35
|
+
class ListNode < SyntaxNode
|
36
|
+
def to_a
|
37
|
+
to_a_deep(contains, IngredientNode).map(&:to_h)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Ingredient
|
42
|
+
class IngredientNode < SyntaxNode
|
43
|
+
def to_h
|
44
|
+
h = {}
|
45
|
+
h.merge!(to_a_deep(ing, IngredientNode)&.first&.to_h || {}) if respond_to?(:ing)
|
46
|
+
h.merge!(to_a_deep(amount, AmountNode)&.first&.to_h || {}) if respond_to?(:amount)
|
47
|
+
h[:name] = name.text_value if respond_to?(:name)
|
48
|
+
h[:name] = pre.text_value + h[:name] if respond_to?(:pre)
|
49
|
+
h[:name] = h[:name] + post.text_value if respond_to?(:post)
|
50
|
+
h[:mark] = mark.text_value if respond_to?(:mark) && mark.text_value != ''
|
51
|
+
h
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Ingredient with containing ingredients.
|
56
|
+
class NestedIngredientNode < IngredientNode
|
57
|
+
def to_h
|
58
|
+
super.merge({ contains: to_a_deep(contains, IngredientNode).map(&:to_h) })
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Amount, specifying an ingredient.
|
63
|
+
class AmountNode < SyntaxNode
|
64
|
+
def to_h
|
65
|
+
{ amount: amount.text_value }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Note at the end of the ingredient list.
|
70
|
+
class NoteNode < SyntaxNode
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require_relative 'grammar'
|
2
|
+
require_relative '../cleaner'
|
2
3
|
|
3
|
-
module FoodIngredientParser
|
4
|
+
module FoodIngredientParser::Strict
|
4
5
|
class Parser
|
5
6
|
|
6
7
|
# @!attribute [r] parser
|
@@ -20,22 +21,9 @@ module FoodIngredientParser
|
|
20
21
|
# @return [FoodIngredientParser::Grammar::RootNode] structured representation of food ingredients
|
21
22
|
# @note Unrecognized options are passed to Treetop, but this is not guarenteed to remain so forever.
|
22
23
|
def parse(s, clean: true, **options)
|
23
|
-
s = clean(s) if clean
|
24
|
+
s = FoodIngredientParser::Cleaner.clean(s) if clean
|
24
25
|
@parser.parse(s, **options)
|
25
26
|
end
|
26
27
|
|
27
|
-
private
|
28
|
-
|
29
|
-
def clean(s)
|
30
|
-
s.gsub!("\u00ad", "") # strip soft hyphen
|
31
|
-
s.gsub!("\u0092", "'") # windows-1252 apostrophe - https://stackoverflow.com/a/15564279/2866660
|
32
|
-
s.gsub!("aÄs", "aïs") # encoding issue for maïs
|
33
|
-
s.gsub!("ï", "ï") # encoding issue
|
34
|
-
s.gsub!("ë", "ë") # encoding issue
|
35
|
-
s.gsub!(/\A\s*"(.*)"\s*\z/, '\1') # enclosing double quotation marks
|
36
|
-
s.gsub!(/\A\s*'(.*)'\s*\z/, '\1') # enclosing single quotation marks
|
37
|
-
s
|
38
|
-
end
|
39
|
-
|
40
28
|
end
|
41
29
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
# Adds HTML output functionality to a Treetop Node.
|
4
|
+
#
|
5
|
+
# The node needs to provide a {#to_h} method (for {#to_html_h}).
|
6
|
+
#
|
7
|
+
module FoodIngredientParser::Strict
|
8
|
+
module ToHtml
|
9
|
+
|
10
|
+
# Markup original ingredients list text in HTML.
|
11
|
+
#
|
12
|
+
# The input text is returned as HTML, augmented with CSS classes
|
13
|
+
# on +span+s for +name+, +amount+, +mark+ and +note+.
|
14
|
+
#
|
15
|
+
# @return [String] HTML representation of ingredient list.
|
16
|
+
def to_html
|
17
|
+
node_to_html(self)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def node_to_html(node, cls=nil, depth=0)
|
23
|
+
el_cls = {} # map of node instances to class names for contained elements
|
24
|
+
terminal = node.terminal? # whether to look at children elements or not
|
25
|
+
|
26
|
+
if node.is_a?(FoodIngredientParser::Strict::Grammar::AmountNode)
|
27
|
+
cls ||= "amount"
|
28
|
+
elsif node.is_a?(FoodIngredientParser::Strict::Grammar::NoteNode)
|
29
|
+
cls ||= "note"
|
30
|
+
terminal = true # NoteNodes may contain other NoteNodes, we want it flat.
|
31
|
+
elsif node.is_a?(FoodIngredientParser::Strict::Grammar::IngredientNode)
|
32
|
+
el_cls[node.name] = "name" if node.respond_to?(:name)
|
33
|
+
el_cls[node.mark] = "mark" if node.respond_to?(:mark)
|
34
|
+
if node.respond_to?(:contains)
|
35
|
+
el_cls[node.contains] = "contains depth#{depth}"
|
36
|
+
depth += 1
|
37
|
+
end
|
38
|
+
elsif node.is_a?(FoodIngredientParser::Strict::Grammar::RootNode)
|
39
|
+
if node.respond_to?(:contains)
|
40
|
+
el_cls[node.contains] = "depth#{depth}"
|
41
|
+
depth += 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
val = if terminal
|
46
|
+
CGI.escapeHTML(node.text_value)
|
47
|
+
else
|
48
|
+
node.elements.map {|el| node_to_html(el, el_cls[el], depth) }.join("")
|
49
|
+
end
|
50
|
+
|
51
|
+
cls ? "<span class='#{cls}'>#{val}</span>" : val
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_ingredient_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.pre.
|
4
|
+
version: 1.0.0.pre.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|
@@ -42,20 +42,26 @@ files:
|
|
42
42
|
- bin/food_ingredient_parser
|
43
43
|
- food_ingredient_parser.gemspec
|
44
44
|
- lib/food_ingredient_parser.rb
|
45
|
-
- lib/food_ingredient_parser/
|
46
|
-
- lib/food_ingredient_parser/
|
47
|
-
- lib/food_ingredient_parser/
|
48
|
-
- lib/food_ingredient_parser/
|
49
|
-
- lib/food_ingredient_parser/
|
50
|
-
- lib/food_ingredient_parser/
|
51
|
-
- lib/food_ingredient_parser/grammar
|
52
|
-
- lib/food_ingredient_parser/grammar/
|
53
|
-
- lib/food_ingredient_parser/grammar/
|
54
|
-
- lib/food_ingredient_parser/grammar/
|
55
|
-
- lib/food_ingredient_parser/grammar/
|
56
|
-
- lib/food_ingredient_parser/
|
57
|
-
- lib/food_ingredient_parser/
|
58
|
-
- lib/food_ingredient_parser/
|
45
|
+
- lib/food_ingredient_parser/cleaner.rb
|
46
|
+
- lib/food_ingredient_parser/loose/node.rb
|
47
|
+
- lib/food_ingredient_parser/loose/parser.rb
|
48
|
+
- lib/food_ingredient_parser/loose/scanner.rb
|
49
|
+
- lib/food_ingredient_parser/loose/transform/amount.rb
|
50
|
+
- lib/food_ingredient_parser/loose/transform/amount_from_name.treetop
|
51
|
+
- lib/food_ingredient_parser/strict/grammar.rb
|
52
|
+
- lib/food_ingredient_parser/strict/grammar/amount.treetop
|
53
|
+
- lib/food_ingredient_parser/strict/grammar/common.treetop
|
54
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient.treetop
|
55
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient_coloned.treetop
|
56
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient_nested.treetop
|
57
|
+
- lib/food_ingredient_parser/strict/grammar/ingredient_simple.treetop
|
58
|
+
- lib/food_ingredient_parser/strict/grammar/list.treetop
|
59
|
+
- lib/food_ingredient_parser/strict/grammar/list_coloned.treetop
|
60
|
+
- lib/food_ingredient_parser/strict/grammar/list_newlined.treetop
|
61
|
+
- lib/food_ingredient_parser/strict/grammar/root.treetop
|
62
|
+
- lib/food_ingredient_parser/strict/nodes.rb
|
63
|
+
- lib/food_ingredient_parser/strict/parser.rb
|
64
|
+
- lib/food_ingredient_parser/strict/to_html.rb
|
59
65
|
- lib/food_ingredient_parser/version.rb
|
60
66
|
homepage: https://github.com/q-m/food-ingredient-parser-ruby
|
61
67
|
licenses:
|
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'treetop/runtime'
|
2
|
-
require_relative 'to_html'
|
3
|
-
|
4
|
-
# Needs to be in grammar namespace so Treetop can find the nodes.
|
5
|
-
module FoodIngredientParser::Grammar
|
6
|
-
|
7
|
-
# Treetop syntax node with our additions, use this as parent for all our own nodes.
|
8
|
-
class SyntaxNode < Treetop::Runtime::SyntaxNode
|
9
|
-
private
|
10
|
-
|
11
|
-
def to_a_deep(n, cls)
|
12
|
-
if n.is_a?(cls)
|
13
|
-
[n]
|
14
|
-
elsif n.nonterminal?
|
15
|
-
n.elements.map {|m| to_a_deep(m, cls) }.flatten(1).compact
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
# Root object, contains everything else.
|
21
|
-
class RootNode < SyntaxNode
|
22
|
-
include FoodIngredientParser::ToHtml
|
23
|
-
|
24
|
-
def to_h
|
25
|
-
h = { contains: contains.to_a }
|
26
|
-
if notes && notes_ary = to_a_deep(notes, NoteNode)&.map(&:text_value)
|
27
|
-
h[:notes] = notes_ary if notes_ary.length > 0
|
28
|
-
end
|
29
|
-
h
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# List of ingredients.
|
34
|
-
class ListNode < SyntaxNode
|
35
|
-
def to_a
|
36
|
-
to_a_deep(contains, IngredientNode).map(&:to_h)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
# Ingredient
|
41
|
-
class IngredientNode < SyntaxNode
|
42
|
-
def to_h
|
43
|
-
h = {}
|
44
|
-
h.merge!(to_a_deep(ing, IngredientNode)&.first&.to_h || {}) if respond_to?(:ing)
|
45
|
-
h.merge!(to_a_deep(amount, AmountNode)&.first&.to_h || {}) if respond_to?(:amount)
|
46
|
-
h[:name] = name.text_value if respond_to?(:name)
|
47
|
-
h[:name] = pre.text_value + h[:name] if respond_to?(:pre)
|
48
|
-
h[:name] = h[:name] + post.text_value if respond_to?(:post)
|
49
|
-
h[:mark] = mark.text_value if respond_to?(:mark) && mark.text_value != ''
|
50
|
-
h
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Ingredient with containing ingredients.
|
55
|
-
class NestedIngredientNode < IngredientNode
|
56
|
-
def to_h
|
57
|
-
super.merge({ contains: to_a_deep(contains, IngredientNode).map(&:to_h) })
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# Amount, specifying an ingredient.
|
62
|
-
class AmountNode < SyntaxNode
|
63
|
-
def to_h
|
64
|
-
{ amount: amount.text_value }
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
# Note at the end of the ingredient list.
|
69
|
-
class NoteNode < SyntaxNode
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
|
3
|
-
# Adds HTML output functionality to a Treetop Node.
|
4
|
-
#
|
5
|
-
# The node needs to provide a {#to_h} method (for {#to_html_h}).
|
6
|
-
#
|
7
|
-
module FoodIngredientParser::ToHtml
|
8
|
-
|
9
|
-
# Markup original ingredients list text in HTML.
|
10
|
-
#
|
11
|
-
# The input text is returned as HTML, augmented with CSS classes
|
12
|
-
# on +span+s for +name+, +amount+, +mark+ and +note+.
|
13
|
-
#
|
14
|
-
# @return [String] HTML representation of ingredient list.
|
15
|
-
def to_html
|
16
|
-
node_to_html(self)
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def node_to_html(node, cls=nil, depth=0)
|
22
|
-
el_cls = {} # map of node instances to class names for contained elements
|
23
|
-
terminal = node.terminal? # whether to look at children elements or not
|
24
|
-
|
25
|
-
if node.is_a?(FoodIngredientParser::Grammar::AmountNode)
|
26
|
-
cls ||= "amount"
|
27
|
-
elsif node.is_a?(FoodIngredientParser::Grammar::NoteNode)
|
28
|
-
cls ||= "note"
|
29
|
-
terminal = true # NoteNodes may contain other NoteNodes, we want it flat.
|
30
|
-
elsif node.is_a?(FoodIngredientParser::Grammar::IngredientNode)
|
31
|
-
el_cls[node.name] = "name" if node.respond_to?(:name)
|
32
|
-
el_cls[node.mark] = "mark" if node.respond_to?(:mark)
|
33
|
-
if node.respond_to?(:contains)
|
34
|
-
el_cls[node.contains] = "contains depth#{depth}"
|
35
|
-
depth += 1
|
36
|
-
end
|
37
|
-
elsif node.is_a?(FoodIngredientParser::Grammar::RootNode)
|
38
|
-
if node.respond_to?(:contains)
|
39
|
-
el_cls[node.contains] = "depth#{depth}"
|
40
|
-
depth += 1
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
val = if terminal
|
45
|
-
CGI.escapeHTML(node.text_value)
|
46
|
-
else
|
47
|
-
node.elements.map {|el| node_to_html(el, el_cls[el], depth) }.join("")
|
48
|
-
end
|
49
|
-
|
50
|
-
cls ? "<span class='#{cls}'>#{val}</span>" : val
|
51
|
-
end
|
52
|
-
end
|
File without changes
|