food_fish_parser 0.3.7 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/food_fish_parser/flat/aquac_method.rb +26 -0
- data/lib/food_fish_parser/flat/{fao_region.rb → area_fao.rb} +2 -2
- data/lib/food_fish_parser/flat/area_name.rb +64 -0
- data/lib/food_fish_parser/flat/catch_method.rb +42 -0
- data/lib/food_fish_parser/flat/fish_name.rb +5 -4
- data/lib/food_fish_parser/flat/fish_name_latin.rb +1 -1
- data/lib/food_fish_parser/flat/fish_name_nl.rb +4 -4
- data/lib/food_fish_parser/flat/kind.rb +6 -4
- data/lib/food_fish_parser/flat/parser.rb +16 -12
- data/lib/food_fish_parser/strict/grammar/catch_area.treetop +6 -2
- data/lib/food_fish_parser/strict/grammar/fish_allergen.treetop +1 -1
- data/lib/food_fish_parser/strict/grammar/fish_name.treetop +3 -3
- data/lib/food_fish_parser/strict/grammar/fish_name_latin.treetop +1 -1
- data/lib/food_fish_parser/strict/grammar/fish_name_nl.treetop +11 -5
- data/lib/food_fish_parser/strict/grammar/root.treetop +3 -2
- data/lib/food_fish_parser/version.rb +2 -2
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1276f2546a66d95c826ec9abe326186077010c0186a8bcdeec5a6fa6595091b6
|
4
|
+
data.tar.gz: 3702ffd5cc2f17f75e936c4938b61a33e4688f15e30677dd6e9130af21a51f3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141366cd9da78e28a6cfaa026e3a66c349e9ba09a06692cc01ea6df19c0e846b09ce10c9f92c83143777737cd5ae4dcb5f67db8c48d4e8ea359cea5925b08266
|
7
|
+
data.tar.gz: 417463cfc6470b951ece7fadf0d50bae63e209933775d9bce974019674acddd3855eeb5fda26024851d17ab39b4d0f3f1ef3deef8ebbe82489e2c8d8a3a1b0bc
|
data/README.md
CHANGED
@@ -121,7 +121,7 @@ FAO regions and fish names found.
|
|
121
121
|
require 'food_fish_parser'
|
122
122
|
|
123
123
|
parser = FoodFishParser::Flat::Parser.new
|
124
|
-
s = "Foobar zalm (salmo salar) *&! gevangen pangasius spp FAO 61 ?or ?FAO 67 what more.")
|
124
|
+
s = "Foobar zalm (salmo salar) *&! gevangen met lijnen pangasius spp FAO 61 ?or ?FAO 67 what more.")
|
125
125
|
puts parser.parse(s).to_a.inspect
|
126
126
|
```
|
127
127
|
|
@@ -136,7 +136,7 @@ puts parser.parse(s).to_a.inspect
|
|
136
136
|
{ :name=>nil, :fao_codes=>["61"] },
|
137
137
|
{ :name=>nil, :fao_codes=>["67"] }
|
138
138
|
],
|
139
|
-
:catch_methods => [],
|
139
|
+
:catch_methods => [{ :text=>"lijnen" }],
|
140
140
|
:aquaculture_areas => [],
|
141
141
|
:aquaculture_methods => []
|
142
142
|
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module FoodFishParser
|
2
|
+
module Flat
|
3
|
+
module AquacMethod
|
4
|
+
|
5
|
+
REGEX = /
|
6
|
+
(
|
7
|
+
kwekerij(?:en)? |
|
8
|
+
\bkweekvijvers? |
|
9
|
+
\bvijvers?(?:\s*\/\s*bassins?)? |
|
10
|
+
\b(?:open\s+)? kooien (?: \s+in\s+zee | \s+in\s+rivier )? |
|
11
|
+
\bdoorstroomsystemen |
|
12
|
+
\b(?:open\s+)? hangcultuur |
|
13
|
+
\bgekweekt\s+in\s+netten
|
14
|
+
)
|
15
|
+
\b
|
16
|
+
/ix
|
17
|
+
|
18
|
+
def self.find_all(text)
|
19
|
+
text
|
20
|
+
.scan(REGEX)
|
21
|
+
.flatten
|
22
|
+
.map {|s| { text: s } }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module FoodFishParser
|
2
2
|
module Flat
|
3
|
-
module
|
3
|
+
module AreaFao
|
4
4
|
|
5
5
|
REGEX_CODE = / 0?\d{2} (?: (?: -0?\d{2} | (?: \/ | - | \s+ | ) [lIV]+ ) [a-d]? )? /ix
|
6
6
|
REGEX = /
|
@@ -28,7 +28,7 @@ module FoodFishParser
|
|
28
28
|
|
29
29
|
def self.find_all(text)
|
30
30
|
find_all_code_groups(text)
|
31
|
-
.map {|a| {
|
31
|
+
.map {|a| { text: nil, fao_codes: a } }
|
32
32
|
end
|
33
33
|
end
|
34
34
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module FoodFishParser
|
2
|
+
module Flat
|
3
|
+
module AreaName
|
4
|
+
|
5
|
+
REGEX_AREA_NAMES = /
|
6
|
+
\b
|
7
|
+
(?:
|
8
|
+
Cornwall |
|
9
|
+
Noorwegen |
|
10
|
+
India |
|
11
|
+
Ierland |
|
12
|
+
Vi[eë]tnam |
|
13
|
+
Indonesi[eë] |
|
14
|
+
Griekenland |
|
15
|
+
Ecuador |
|
16
|
+
Limfjord |
|
17
|
+
Denemarken |
|
18
|
+
Chili |
|
19
|
+
Faroër eilanden |
|
20
|
+
Groenland |
|
21
|
+
Californi[eë] |
|
22
|
+
Malediven |
|
23
|
+
Spitsbergen |
|
24
|
+
Bereneiland |
|
25
|
+
Maleisi[eë] |
|
26
|
+
(?:
|
27
|
+
atlantische | atlantic | pacifische | pacific | stille | noorse |
|
28
|
+
middelandse | indische
|
29
|
+
) \s+ (?: oceaan | ocean | zee) |
|
30
|
+
Noordzee |
|
31
|
+
Barentszzee |
|
32
|
+
Ijsland
|
33
|
+
)
|
34
|
+
\b
|
35
|
+
/ix
|
36
|
+
|
37
|
+
REGEX_AREA = /
|
38
|
+
\b
|
39
|
+
(
|
40
|
+
(?: (?: rond | bij | aan | voor ) \s+ )?
|
41
|
+
(?: de\s+ (?: kust | fjorden ) \s+van\s+ )?
|
42
|
+
(?: (?:\(sub\))?tropische\s+wateren\s+in\s+ )?
|
43
|
+
(?: (?: de | het ) \s+ )?
|
44
|
+
(?: (?: \s* (?:noord|zuid|oost|west)-? )+(?:elijke?\s+(?: deel | gedeelte )\s+van)? \s+ )?
|
45
|
+
(?: (?: de | het ) \s+ )?
|
46
|
+
#{REGEX_AREA_NAMES}
|
47
|
+
)
|
48
|
+
\b
|
49
|
+
/ix
|
50
|
+
|
51
|
+
REGEX = /
|
52
|
+
\b#{REGEX_AREA} (?: (?: \s+(?:en|of|en\/of)\s+ | \s*,\s* ) #{REGEX_AREA} )* \b
|
53
|
+
/ix
|
54
|
+
|
55
|
+
def self.find_all(text)
|
56
|
+
text
|
57
|
+
.scan(REGEX)
|
58
|
+
.flatten
|
59
|
+
.compact
|
60
|
+
.map {|s| { text: s.strip.sub(/^(de|het|een)\s*/, ''), fao_codes: [] } }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module FoodFishParser
|
2
|
+
module Flat
|
3
|
+
module CatchMethod
|
4
|
+
|
5
|
+
REGEX = /
|
6
|
+
\b
|
7
|
+
(
|
8
|
+
trawlnet(?:ten)? |
|
9
|
+
trawler |
|
10
|
+
haken\s+en\s+lijnen |
|
11
|
+
(machinale\s+)? handlijnen |
|
12
|
+
ring-\s+en\s+kruisnet(?:ten)? |
|
13
|
+
ringnet(?:ten)? |
|
14
|
+
kruisnet(?:ten)? |
|
15
|
+
sleeplijn(?:en)? |
|
16
|
+
kieuwnet(?:ten)? |
|
17
|
+
(?:soortgelijke|vergelijkbare)\s+net(?:ten)? |
|
18
|
+
(?:zwevende\s+)? (?:bodem)?(?:otter)?trawls? |
|
19
|
+
(?:deense\s+)? zegens? |
|
20
|
+
ringzegens? |
|
21
|
+
hengels?\s+en\s+lijn(?:en)? |
|
22
|
+
met\s+de\s+hengel |
|
23
|
+
met\s+lijnen |
|
24
|
+
vallen |
|
25
|
+
purse\s*seiner
|
26
|
+
) (?:visserij)?
|
27
|
+
\b
|
28
|
+
/ix
|
29
|
+
|
30
|
+
def self.find_all(text)
|
31
|
+
text
|
32
|
+
.scan(REGEX)
|
33
|
+
.flatten
|
34
|
+
.compact
|
35
|
+
.map do |s|
|
36
|
+
s.gsub!(/^met\b(\s+de\b)?\s*/, '')
|
37
|
+
{ text: s }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -4,14 +4,15 @@ require_relative 'fish_name_nl'
|
|
4
4
|
module FoodFishParser
|
5
5
|
module Flat
|
6
6
|
module FishName
|
7
|
-
|
7
|
+
REGEX_ALLERGEN_NAMES = /\b(?:visvlees|vis|weekdieren|schaaldieren)\b/i
|
8
|
+
REGEX_ALLERGEN = /\s*(?: \(\s*#{REGEX_ALLERGEN_NAMES}\s*\) | \b,?\s*#{REGEX_ALLERGEN_NAMES}\s*,?\s*\b )/ix
|
8
9
|
|
9
10
|
REGEX = /
|
10
11
|
(?:
|
11
|
-
#{FishNameNL::REGEX} (?: #{
|
12
|
-
#{FishNameNL::REGEX} \s*,?\s* \b #{FishNameLatin::REGEX} |
|
12
|
+
(?!#{REGEX_ALLERGEN}) #{FishNameNL::REGEX} (?: #{REGEX_ALLERGEN} )? \s* \( \s* #{FishNameLatin::REGEX} \s* \) |
|
13
|
+
(?!#{REGEX_ALLERGEN}) #{FishNameNL::REGEX} \s*,?\s* \b #{FishNameLatin::REGEX} |
|
13
14
|
#{FishNameLatin::REGEX} |
|
14
|
-
#{FishNameNL::REGEX}
|
15
|
+
(?!#{REGEX_ALLERGEN}) #{FishNameNL::REGEX}
|
15
16
|
)
|
16
17
|
/ix
|
17
18
|
|
@@ -1,11 +1,11 @@
|
|
1
|
-
# autogenerated by species-treetop-gen-nl.rb on 2020-03-
|
1
|
+
# autogenerated by species-treetop-gen-nl.rb on 2020-03-31
|
2
2
|
module FoodFishParser
|
3
3
|
module Flat
|
4
4
|
module FishNameNL
|
5
|
-
REGEX_AREA = /pacifische|noorse|japanse|indische|ijsselmeer|groenlandse|atlantische|argentijnse|alaska/i
|
5
|
+
REGEX_AREA = /pacifische|pacific|noorse|japanse|indische|ijsselmeer|groenlandse|atlantische|argentijnse|alaska/i
|
6
6
|
REGEX_ATTR = /zwarte|zwart|witte|witpoot|wit|roze|rood|rode|rivier|pijl|kleine|klein|grote|groot|groene|groen|grise|doorn|coho|chum|blonde|blond|blauwe|blauw/i
|
7
|
-
REGEX_NAME = /zwaardvis|zonnevis|zeewolf|zeesnoek|zeekreeft|zeeforel|zeebaars|zalm|wijting|weekdieren|weekdier|vliegende\ vis|vis|vintonijn|vin\ tonijn|tonijn\ albacore|tonijn|tong|tilapia|thon\ albacore|tarbot|tapijtschelp|surimi|steur|st\.\ pierre|sprot|spie|snotolf|snoekbaars|snoek|snapper|sliptong|skrei|skipjack\ tuna|skipjack\ tonijn|skipjack|seabob\ garnalen|schol|schelvis|schelpen|schelp|schar|sardines|sardinen|sardinelle|sardine|sardienen|sardien|saibling|rouget|rogvleugel|riddervis|regenboogforel|red\ snapper|red\ grouper|raat|poon|pollock|pollak|pilchard|pieterman|pangasius|paling|pagre|oogtonijn|nijlbaars|neustong|mul|mosselen|mossel|meerval|marlijn|mantelschelp|makreel|mahi\-mahi|mahi\ mahi|maatjesha|lom|leng|kreeft|krab|koolvis|kokkel|king\ clip|karper|kabeljauw|inktvis|hondstong|hoki|heilbot|heekrug|heek|haring|harder\ diklip|hamvis|hake|haai|gruis|grouper|griet|geep|geelvintonijn|geelvin\ tonijn|garnalen|garnaal|forel|fint|escolar|dorade|diklipharder|diklip\ harder|daurade|corvina|coquilles|conger\ aal|cocquilles|cobia|claresse|caviaar|braadha|botervis|bot|blood\ snapper|beekridder|barracumundi|barracuda|bacalao|baars|arkschelp|ansovis|ansjovis|alfonsino|albacore\ tonijn/i
|
8
|
-
REGEX_SUFX = /wang|vlees|steur|ringen|ring|moten|kaviaar|filets|filetes|filet|eiwit/i
|
7
|
+
REGEX_NAME = /zwaardvis|zonnevis|zeewolf|zeesnoek|zeekreeft|zeeforel|zeebaars|zalm|wijting|weekdieren|weekdier|vliegende\ vis|vis|vintonijn|vin\ tonijn|tonijn\ albacore|tonijn|tong|tilapia|thon\ albacore|tarbot|tapijtschelp|surimi|steur|st\.\ pierre|sprot|spie|sockeye\ zalm|snotolf|snoekbaars|snoek|snapper|sliptong|skrei|skipjack\ tuna|skipjack\ tonijn|skipjack|seabob\ garnalen|schol|schelvis|schelpen|schelp|schar|sardines|sardinen|sardinelle|sardine|sardienen|sardien|saibling|rouget|rogvleugel|riddervis|regenboogforel|red\ snapper|red\ grouper|raat|poon|pollock|pollak|pilchard|pieterman|pangasius|paling|pagre|oogtonijn|nijlbaars|neustong|mul|mosselen|mossel|meerval|marlijn|mantelschelp|makreel|mahi\-mahi|mahi\ mahi|maatjesha|lom|leng|kreeft|krab|koolvis|kokkel|king\ clip|karper|kabeljauw|inktvis|hondstong|hoki|heilbot|heekrug|heek|haring|harder\ diklip|hamvis|hake|haai|gruis|grouper|griet|geep|geelvintonijn|geelvin\ tonijn|garnalen|garnaal|forel|fint|escolar|dorade|diklipharder|diklip\ harder|daurade|corvina|coquilles|conger\ aal|cocquilles|cobia|claresse|caviaar|braadha|botervis|bot|blood\ snapper|beekridder|barracumundi|barracuda|bacalao|baars|arkschelp|ansovis|ansjovis|alfonsino|albacore\ tonijn/i
|
8
|
+
REGEX_SUFX = /wang|vlees|steur|ringen|ring|moten|lever|kaviaar|filets|filetes|filet|eiwit/i
|
9
9
|
|
10
10
|
REGEX = /
|
11
11
|
(?: \b verse \s+ | \b kaviaar \s+ | \b kaviaar \s+ van \s+ | \b gevilde \s+ )?
|
@@ -9,10 +9,11 @@ module FoodFishParser
|
|
9
9
|
(?:
|
10
10
|
gevangen |
|
11
11
|
visgebied |
|
12
|
-
|
13
|
-
|
12
|
+
vangst?gebied |
|
13
|
+
visvangst |
|
14
14
|
betrapt \s+ bij |
|
15
|
-
wilde? \s+ #{FishNameNL::REGEX}
|
15
|
+
wilde? \s+ #{FishNameNL::REGEX} |
|
16
|
+
MSC # certificate is for wild fish only
|
16
17
|
)
|
17
18
|
\b
|
18
19
|
/ix
|
@@ -24,7 +25,8 @@ module FoodFishParser
|
|
24
25
|
aquacultuurproduct |
|
25
26
|
aquacultuur \s+ product |
|
26
27
|
kweekmethode |
|
27
|
-
kweekmethoden
|
28
|
+
kweekmethoden |
|
29
|
+
ASC # certificate is for aquaculture fish only
|
28
30
|
)
|
29
31
|
\b
|
30
32
|
/ix
|
@@ -1,5 +1,8 @@
|
|
1
1
|
require_relative 'fish_name'
|
2
|
-
require_relative '
|
2
|
+
require_relative 'area_fao'
|
3
|
+
require_relative 'area_name'
|
4
|
+
require_relative 'catch_method'
|
5
|
+
require_relative 'aquac_method'
|
3
6
|
require_relative 'kind'
|
4
7
|
|
5
8
|
module FoodFishParser
|
@@ -17,31 +20,32 @@ module FoodFishParser
|
|
17
20
|
# @return [Array<Hash>] structured representation of fish details (maximum one item)
|
18
21
|
def parse(s, **options)
|
19
22
|
names = FishName.find_all(s)
|
20
|
-
areas =
|
21
|
-
|
23
|
+
areas = AreaName.find_all(s) + AreaFao.find_all(s)
|
24
|
+
catch_methods = CatchMethod.find_all(s)
|
25
|
+
aquac_methods = AquacMethod.find_all(s)
|
22
26
|
|
23
|
-
is_wild = Kind.is_wild?(s)
|
24
|
-
is_aquac = Kind.is_aquac?(s)
|
27
|
+
is_wild = catch_methods.any? || Kind.is_wild?(s)
|
28
|
+
is_aquac = aquac_methods.any? || Kind.is_aquac?(s)
|
25
29
|
|
26
|
-
return [] unless names.any? || areas.any?
|
30
|
+
return [] unless names.any? || aquac_methods.any? || catch_methods.any? || areas.any?
|
27
31
|
|
28
32
|
attrs = {
|
29
33
|
names: names,
|
30
34
|
catch_areas: [],
|
31
|
-
catch_methods:
|
35
|
+
catch_methods: catch_methods,
|
32
36
|
aquaculture_areas: [],
|
33
|
-
aquaculture_methods:
|
37
|
+
aquaculture_methods: aquac_methods
|
34
38
|
}
|
35
39
|
|
36
40
|
if is_wild && !is_aquac
|
37
|
-
[attrs.merge(catch_areas: areas
|
41
|
+
[attrs.merge(catch_areas: areas)]
|
38
42
|
elsif !is_wild && is_aquac
|
39
|
-
[attrs.merge(aquaculture_areas: areas
|
40
|
-
elsif areas.any?
|
43
|
+
[attrs.merge(aquaculture_areas: areas)]
|
44
|
+
elsif areas.any?
|
41
45
|
# We have a problem: either there are multiple fish and they're a mix of
|
42
46
|
# wild and aquaculture fish, or there is no such indication at all.
|
43
47
|
# For now, we return it in a non-standard way (this needs to be tackled).
|
44
|
-
[attrs.merge(areas: areas
|
48
|
+
[attrs.merge(areas: areas)]
|
45
49
|
else
|
46
50
|
# just names
|
47
51
|
[attrs]
|
@@ -7,7 +7,7 @@ module FoodFishParser::Strict::Grammar
|
|
7
7
|
rule catch_area_indicator
|
8
8
|
( ( comma ws* )? ( 'wildfang'i / 'wild'i ) ws* ( comma ws* )? )?
|
9
9
|
(
|
10
|
-
'gevangen'i ws+
|
10
|
+
'gevangen'i ws+ catch_area_indicator_preposition /
|
11
11
|
'visgebied'i / 'vangstgebied'i / 'vangsgebied'i / 'fanggebiet'i /
|
12
12
|
'betrapt bij'i
|
13
13
|
)
|
@@ -17,7 +17,11 @@ module FoodFishParser::Strict::Grammar
|
|
17
17
|
|
18
18
|
rule catch_area_indicator_short
|
19
19
|
catch_area_indicator /
|
20
|
-
|
20
|
+
catch_area_indicator_preposition !char ( ws* ':' )?
|
21
|
+
end
|
22
|
+
|
23
|
+
rule catch_area_indicator_preposition
|
24
|
+
'in'i / 'op'i / 'voor'i
|
21
25
|
end
|
22
26
|
|
23
27
|
rule catch_area_content
|
@@ -6,7 +6,7 @@ module FoodFishParser::Strict::Grammar
|
|
6
6
|
include FishNameNL
|
7
7
|
|
8
8
|
rule fish_name_both
|
9
|
-
( fish_name_nl ws* '(' ( ws* fish_allergen )? ws* fish_name_latin ( ws* ')' / comma )? )
|
9
|
+
( !fish_allergen fish_name_nl ws* '(' ( ws* fish_allergen )? ws* fish_name_latin ( ws* ')' / comma )? )
|
10
10
|
end
|
11
11
|
|
12
12
|
rule fish_name_both_list
|
@@ -20,8 +20,8 @@ module FoodFishParser::Strict::Grammar
|
|
20
20
|
end
|
21
21
|
|
22
22
|
rule fish_name_nl_list
|
23
|
-
( fish_name_nl <FishNameNode> )
|
24
|
-
( ws+ and_or ws+ fish_name_nl <FishNameNode> )*
|
23
|
+
( !fish_allergen fish_name_nl <FishNameNode> )
|
24
|
+
( ws+ and_or ws+ !fish_allergen fish_name_nl <FishNameNode> )*
|
25
25
|
end
|
26
26
|
|
27
27
|
rule fish_name_any_list
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# autogenerated by species-treetop-gen-nl.rb on 2020-03-
|
1
|
+
# autogenerated by species-treetop-gen-nl.rb on 2020-03-31
|
2
2
|
module FoodFishParser::Strict::Grammar
|
3
3
|
grammar FishNameNL
|
4
4
|
include Common
|
@@ -7,13 +7,19 @@ module FoodFishParser::Strict::Grammar
|
|
7
7
|
rule fish_name_nl
|
8
8
|
'(' ws* fish_name_nl ws* ')' /
|
9
9
|
( 'verse'i ws+ / 'kaviaar'i ws+ 'van'i ws+ / 'kaviaar'i ws+ / 'gevilde'i ws+ )?
|
10
|
-
(
|
10
|
+
(
|
11
|
+
fish_name_nl_area ws+ fish_name_nl_attr ws+ /
|
12
|
+
fish_name_nl_attr ws+ fish_name_nl_area ws+ /
|
13
|
+
fish_name_nl_area ws+ /
|
14
|
+
fish_name_nl_attr ws+
|
15
|
+
)?
|
16
|
+
fish_name_nl_name fish_name_nl_suffix?
|
11
17
|
( ws* fish_allergen )?
|
12
18
|
<FishNameCommonNode>
|
13
19
|
end
|
14
20
|
|
15
21
|
rule fish_name_nl_area
|
16
|
-
'pacifische'i / 'noorse'i / 'japanse'i / 'indische'i / 'ijsselmeer'i / 'groenlandse'i / 'atlantische'i / 'argentijnse'i / 'alaska'i
|
22
|
+
'pacifische'i / 'pacific'i / 'noorse'i / 'japanse'i / 'indische'i / 'ijsselmeer'i / 'groenlandse'i / 'atlantische'i / 'argentijnse'i / 'alaska'i
|
17
23
|
end
|
18
24
|
|
19
25
|
rule fish_name_nl_attr
|
@@ -21,11 +27,11 @@ module FoodFishParser::Strict::Grammar
|
|
21
27
|
end
|
22
28
|
|
23
29
|
rule fish_name_nl_name
|
24
|
-
'zwaardvis'i / 'zonnevis'i / 'zeewolf'i / 'zeesnoek'i / 'zeekreeft'i / 'zeeforel'i / 'zeebaars'i / 'zalm'i / 'wijting'i / 'weekdieren'i / 'weekdier'i / 'vliegende vis'i / 'vis'i / 'vintonijn'i / 'vin tonijn'i / 'tonijn albacore'i / 'tonijn'i / 'tong'i / 'tilapia'i / 'thon albacore'i / 'tarbot'i / 'tapijtschelp'i / 'surimi'i / 'steur'i / 'st. pierre'i / 'sprot'i / 'spie'i / 'snotolf'i / 'snoekbaars'i / 'snoek'i / 'snapper'i / 'sliptong'i / 'skrei'i / 'skipjack tuna'i / 'skipjack tonijn'i / 'skipjack'i / 'seabob garnalen'i / 'schol'i / 'schelvis'i / 'schelpen'i / 'schelp'i / 'schar'i / 'sardines'i / 'sardinen'i / 'sardinelle'i / 'sardine'i / 'sardienen'i / 'sardien'i / 'saibling'i / 'rouget'i / 'rogvleugel'i / 'riddervis'i / 'regenboogforel'i / 'red snapper'i / 'red grouper'i / 'raat'i / 'poon'i / 'pollock'i / 'pollak'i / 'pilchard'i / 'pieterman'i / 'pangasius'i / 'paling'i / 'pagre'i / 'oogtonijn'i / 'nijlbaars'i / 'neustong'i / 'mul'i / 'mosselen'i / 'mossel'i / 'meerval'i / 'marlijn'i / 'mantelschelp'i / 'makreel'i / 'mahi-mahi'i / 'mahi mahi'i / 'maatjesha'i / 'lom'i / 'leng'i / 'kreeft'i / 'krab'i / 'koolvis'i / 'kokkel'i / 'king clip'i / 'karper'i / 'kabeljauw'i / 'inktvis'i / 'hondstong'i / 'hoki'i / 'heilbot'i / 'heekrug'i / 'heek'i / 'haring'i / 'harder diklip'i / 'hamvis'i / 'hake'i / 'haai'i / 'gruis'i / 'grouper'i / 'griet'i / 'geep'i / 'geelvintonijn'i / 'geelvin tonijn'i / 'garnalen'i / 'garnaal'i / 'forel'i / 'fint'i / 'escolar'i / 'dorade'i / 'diklipharder'i / 'diklip harder'i / 'daurade'i / 'corvina'i / 'coquilles'i / 'conger aal'i / 'cocquilles'i / 'cobia'i / 'claresse'i / 'caviaar'i / 'braadha'i / 'botervis'i / 'bot'i / 'blood snapper'i / 'beekridder'i / 'barracumundi'i / 'barracuda'i / 'bacalao'i / 'baars'i / 'arkschelp'i / 'ansovis'i / 'ansjovis'i / 'alfonsino'i / 'albacore tonijn'i
|
30
|
+
'zwaardvis'i / 'zonnevis'i / 'zeewolf'i / 'zeesnoek'i / 'zeekreeft'i / 'zeeforel'i / 'zeebaars'i / 'zalm'i / 'wijting'i / 'weekdieren'i / 'weekdier'i / 'vliegende vis'i / 'vis'i / 'vintonijn'i / 'vin tonijn'i / 'tonijn albacore'i / 'tonijn'i / 'tong'i / 'tilapia'i / 'thon albacore'i / 'tarbot'i / 'tapijtschelp'i / 'surimi'i / 'steur'i / 'st. pierre'i / 'sprot'i / 'spie'i / 'sockeye zalm'i / 'snotolf'i / 'snoekbaars'i / 'snoek'i / 'snapper'i / 'sliptong'i / 'skrei'i / 'skipjack tuna'i / 'skipjack tonijn'i / 'skipjack'i / 'seabob garnalen'i / 'schol'i / 'schelvis'i / 'schelpen'i / 'schelp'i / 'schar'i / 'sardines'i / 'sardinen'i / 'sardinelle'i / 'sardine'i / 'sardienen'i / 'sardien'i / 'saibling'i / 'rouget'i / 'rogvleugel'i / 'riddervis'i / 'regenboogforel'i / 'red snapper'i / 'red grouper'i / 'raat'i / 'poon'i / 'pollock'i / 'pollak'i / 'pilchard'i / 'pieterman'i / 'pangasius'i / 'paling'i / 'pagre'i / 'oogtonijn'i / 'nijlbaars'i / 'neustong'i / 'mul'i / 'mosselen'i / 'mossel'i / 'meerval'i / 'marlijn'i / 'mantelschelp'i / 'makreel'i / 'mahi-mahi'i / 'mahi mahi'i / 'maatjesha'i / 'lom'i / 'leng'i / 'kreeft'i / 'krab'i / 'koolvis'i / 'kokkel'i / 'king clip'i / 'karper'i / 'kabeljauw'i / 'inktvis'i / 'hondstong'i / 'hoki'i / 'heilbot'i / 'heekrug'i / 'heek'i / 'haring'i / 'harder diklip'i / 'hamvis'i / 'hake'i / 'haai'i / 'gruis'i / 'grouper'i / 'griet'i / 'geep'i / 'geelvintonijn'i / 'geelvin tonijn'i / 'garnalen'i / 'garnaal'i / 'forel'i / 'fint'i / 'escolar'i / 'dorade'i / 'diklipharder'i / 'diklip harder'i / 'daurade'i / 'corvina'i / 'coquilles'i / 'conger aal'i / 'cocquilles'i / 'cobia'i / 'claresse'i / 'caviaar'i / 'braadha'i / 'botervis'i / 'bot'i / 'blood snapper'i / 'beekridder'i / 'barracumundi'i / 'barracuda'i / 'bacalao'i / 'baars'i / 'arkschelp'i / 'ansovis'i / 'ansjovis'i / 'alfonsino'i / 'albacore tonijn'i
|
25
31
|
end
|
26
32
|
|
27
33
|
rule fish_name_nl_suffix
|
28
|
-
'wang'i / 'vlees'i / 'steur'i / 'ringen'i / 'ring'i / 'moten'i / 'kaviaar'i / 'filets'i / 'filetes'i / 'filet'i / 'eiwit'i
|
34
|
+
'wang'i / 'vlees'i / 'steur'i / 'ringen'i / 'ring'i / 'moten'i / 'lever'i / 'kaviaar'i / 'filets'i / 'filetes'i / 'filet'i / 'eiwit'i
|
29
35
|
end
|
30
36
|
end
|
31
37
|
end
|
@@ -54,8 +54,9 @@ module FoodFishParser::Strict::Grammar
|
|
54
54
|
# fish with catch or aquaculture info
|
55
55
|
rule fish_with_info
|
56
56
|
(
|
57
|
-
|
58
|
-
( fish_name_any_list ( ws* ( comma / ':' ) )? ws+
|
57
|
+
# @todo move optional '(' after common fish name to root and properly match start and end brackets
|
58
|
+
( fish_name_any_list ( ws* ( comma / ':' ) )? ws+ fish_catch_info ( ws* ')' )? ) /
|
59
|
+
( fish_name_any_list ( ws* ( comma / ':' ) )? ws+ fish_aquac_info ( ws* ')' )? )
|
59
60
|
)
|
60
61
|
<FishNode>
|
61
62
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_fish_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|
@@ -43,7 +43,10 @@ files:
|
|
43
43
|
- bin/food_fish_parser
|
44
44
|
- food_fish_parser.gemspec
|
45
45
|
- lib/food_fish_parser.rb
|
46
|
-
- lib/food_fish_parser/flat/
|
46
|
+
- lib/food_fish_parser/flat/aquac_method.rb
|
47
|
+
- lib/food_fish_parser/flat/area_fao.rb
|
48
|
+
- lib/food_fish_parser/flat/area_name.rb
|
49
|
+
- lib/food_fish_parser/flat/catch_method.rb
|
47
50
|
- lib/food_fish_parser/flat/fish_name.rb
|
48
51
|
- lib/food_fish_parser/flat/fish_name_latin.rb
|
49
52
|
- lib/food_fish_parser/flat/fish_name_nl.rb
|