food_fish_parser 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/food_fish_parser/flat/aquac_method.rb +26 -0
- data/lib/food_fish_parser/flat/{fao_region.rb → area_fao.rb} +2 -2
- data/lib/food_fish_parser/flat/area_name.rb +64 -0
- data/lib/food_fish_parser/flat/catch_method.rb +42 -0
- data/lib/food_fish_parser/flat/fish_name.rb +5 -4
- data/lib/food_fish_parser/flat/fish_name_latin.rb +1 -1
- data/lib/food_fish_parser/flat/fish_name_nl.rb +4 -4
- data/lib/food_fish_parser/flat/kind.rb +6 -4
- data/lib/food_fish_parser/flat/parser.rb +16 -12
- data/lib/food_fish_parser/strict/grammar/catch_area.treetop +6 -2
- data/lib/food_fish_parser/strict/grammar/fish_allergen.treetop +1 -1
- data/lib/food_fish_parser/strict/grammar/fish_name.treetop +3 -3
- data/lib/food_fish_parser/strict/grammar/fish_name_latin.treetop +1 -1
- data/lib/food_fish_parser/strict/grammar/fish_name_nl.treetop +11 -5
- data/lib/food_fish_parser/strict/grammar/root.treetop +3 -2
- data/lib/food_fish_parser/version.rb +2 -2
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1276f2546a66d95c826ec9abe326186077010c0186a8bcdeec5a6fa6595091b6
|
4
|
+
data.tar.gz: 3702ffd5cc2f17f75e936c4938b61a33e4688f15e30677dd6e9130af21a51f3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 141366cd9da78e28a6cfaa026e3a66c349e9ba09a06692cc01ea6df19c0e846b09ce10c9f92c83143777737cd5ae4dcb5f67db8c48d4e8ea359cea5925b08266
|
7
|
+
data.tar.gz: 417463cfc6470b951ece7fadf0d50bae63e209933775d9bce974019674acddd3855eeb5fda26024851d17ab39b4d0f3f1ef3deef8ebbe82489e2c8d8a3a1b0bc
|
data/README.md
CHANGED
@@ -121,7 +121,7 @@ FAO regions and fish names found.
|
|
121
121
|
require 'food_fish_parser'
|
122
122
|
|
123
123
|
parser = FoodFishParser::Flat::Parser.new
|
124
|
-
s = "Foobar zalm (salmo salar) *&! gevangen pangasius spp FAO 61 ?or ?FAO 67 what more.")
|
124
|
+
s = "Foobar zalm (salmo salar) *&! gevangen met lijnen pangasius spp FAO 61 ?or ?FAO 67 what more.")
|
125
125
|
puts parser.parse(s).to_a.inspect
|
126
126
|
```
|
127
127
|
|
@@ -136,7 +136,7 @@ puts parser.parse(s).to_a.inspect
|
|
136
136
|
{ :name=>nil, :fao_codes=>["61"] },
|
137
137
|
{ :name=>nil, :fao_codes=>["67"] }
|
138
138
|
],
|
139
|
-
:catch_methods => [],
|
139
|
+
:catch_methods => [{ :text=>"lijnen" }],
|
140
140
|
:aquaculture_areas => [],
|
141
141
|
:aquaculture_methods => []
|
142
142
|
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module FoodFishParser
|
2
|
+
module Flat
|
3
|
+
module AquacMethod
|
4
|
+
|
5
|
+
REGEX = /
|
6
|
+
(
|
7
|
+
kwekerij(?:en)? |
|
8
|
+
\bkweekvijvers? |
|
9
|
+
\bvijvers?(?:\s*\/\s*bassins?)? |
|
10
|
+
\b(?:open\s+)? kooien (?: \s+in\s+zee | \s+in\s+rivier )? |
|
11
|
+
\bdoorstroomsystemen |
|
12
|
+
\b(?:open\s+)? hangcultuur |
|
13
|
+
\bgekweekt\s+in\s+netten
|
14
|
+
)
|
15
|
+
\b
|
16
|
+
/ix
|
17
|
+
|
18
|
+
def self.find_all(text)
|
19
|
+
text
|
20
|
+
.scan(REGEX)
|
21
|
+
.flatten
|
22
|
+
.map {|s| { text: s } }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module FoodFishParser
|
2
2
|
module Flat
|
3
|
-
module
|
3
|
+
module AreaFao
|
4
4
|
|
5
5
|
REGEX_CODE = / 0?\d{2} (?: (?: -0?\d{2} | (?: \/ | - | \s+ | ) [lIV]+ ) [a-d]? )? /ix
|
6
6
|
REGEX = /
|
@@ -28,7 +28,7 @@ module FoodFishParser
|
|
28
28
|
|
29
29
|
def self.find_all(text)
|
30
30
|
find_all_code_groups(text)
|
31
|
-
.map {|a| {
|
31
|
+
.map {|a| { text: nil, fao_codes: a } }
|
32
32
|
end
|
33
33
|
end
|
34
34
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module FoodFishParser
|
2
|
+
module Flat
|
3
|
+
module AreaName
|
4
|
+
|
5
|
+
REGEX_AREA_NAMES = /
|
6
|
+
\b
|
7
|
+
(?:
|
8
|
+
Cornwall |
|
9
|
+
Noorwegen |
|
10
|
+
India |
|
11
|
+
Ierland |
|
12
|
+
Vi[eë]tnam |
|
13
|
+
Indonesi[eë] |
|
14
|
+
Griekenland |
|
15
|
+
Ecuador |
|
16
|
+
Limfjord |
|
17
|
+
Denemarken |
|
18
|
+
Chili |
|
19
|
+
Faroër eilanden |
|
20
|
+
Groenland |
|
21
|
+
Californi[eë] |
|
22
|
+
Malediven |
|
23
|
+
Spitsbergen |
|
24
|
+
Bereneiland |
|
25
|
+
Maleisi[eë] |
|
26
|
+
(?:
|
27
|
+
atlantische | atlantic | pacifische | pacific | stille | noorse |
|
28
|
+
middelandse | indische
|
29
|
+
) \s+ (?: oceaan | ocean | zee) |
|
30
|
+
Noordzee |
|
31
|
+
Barentszzee |
|
32
|
+
Ijsland
|
33
|
+
)
|
34
|
+
\b
|
35
|
+
/ix
|
36
|
+
|
37
|
+
REGEX_AREA = /
|
38
|
+
\b
|
39
|
+
(
|
40
|
+
(?: (?: rond | bij | aan | voor ) \s+ )?
|
41
|
+
(?: de\s+ (?: kust | fjorden ) \s+van\s+ )?
|
42
|
+
(?: (?:\(sub\))?tropische\s+wateren\s+in\s+ )?
|
43
|
+
(?: (?: de | het ) \s+ )?
|
44
|
+
(?: (?: \s* (?:noord|zuid|oost|west)-? )+(?:elijke?\s+(?: deel | gedeelte )\s+van)? \s+ )?
|
45
|
+
(?: (?: de | het ) \s+ )?
|
46
|
+
#{REGEX_AREA_NAMES}
|
47
|
+
)
|
48
|
+
\b
|
49
|
+
/ix
|
50
|
+
|
51
|
+
REGEX = /
|
52
|
+
\b#{REGEX_AREA} (?: (?: \s+(?:en|of|en\/of)\s+ | \s*,\s* ) #{REGEX_AREA} )* \b
|
53
|
+
/ix
|
54
|
+
|
55
|
+
def self.find_all(text)
|
56
|
+
text
|
57
|
+
.scan(REGEX)
|
58
|
+
.flatten
|
59
|
+
.compact
|
60
|
+
.map {|s| { text: s.strip.sub(/^(de|het|een)\s*/, ''), fao_codes: [] } }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module FoodFishParser
|
2
|
+
module Flat
|
3
|
+
module CatchMethod
|
4
|
+
|
5
|
+
REGEX = /
|
6
|
+
\b
|
7
|
+
(
|
8
|
+
trawlnet(?:ten)? |
|
9
|
+
trawler |
|
10
|
+
haken\s+en\s+lijnen |
|
11
|
+
(machinale\s+)? handlijnen |
|
12
|
+
ring-\s+en\s+kruisnet(?:ten)? |
|
13
|
+
ringnet(?:ten)? |
|
14
|
+
kruisnet(?:ten)? |
|
15
|
+
sleeplijn(?:en)? |
|
16
|
+
kieuwnet(?:ten)? |
|
17
|
+
(?:soortgelijke|vergelijkbare)\s+net(?:ten)? |
|
18
|
+
(?:zwevende\s+)? (?:bodem)?(?:otter)?trawls? |
|
19
|
+
(?:deense\s+)? zegens? |
|
20
|
+
ringzegens? |
|
21
|
+
hengels?\s+en\s+lijn(?:en)? |
|
22
|
+
met\s+de\s+hengel |
|
23
|
+
met\s+lijnen |
|
24
|
+
vallen |
|
25
|
+
purse\s*seiner
|
26
|
+
) (?:visserij)?
|
27
|
+
\b
|
28
|
+
/ix
|
29
|
+
|
30
|
+
def self.find_all(text)
|
31
|
+
text
|
32
|
+
.scan(REGEX)
|
33
|
+
.flatten
|
34
|
+
.compact
|
35
|
+
.map do |s|
|
36
|
+
s.gsub!(/^met\b(\s+de\b)?\s*/, '')
|
37
|
+
{ text: s }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -4,14 +4,15 @@ require_relative 'fish_name_nl'
|
|
4
4
|
module FoodFishParser
|
5
5
|
module Flat
|
6
6
|
module FishName
|
7
|
-
|
7
|
+
REGEX_ALLERGEN_NAMES = /\b(?:visvlees|vis|weekdieren|schaaldieren)\b/i
|
8
|
+
REGEX_ALLERGEN = /\s*(?: \(\s*#{REGEX_ALLERGEN_NAMES}\s*\) | \b,?\s*#{REGEX_ALLERGEN_NAMES}\s*,?\s*\b )/ix
|
8
9
|
|
9
10
|
REGEX = /
|
10
11
|
(?:
|
11
|
-
#{FishNameNL::REGEX} (?: #{
|
12
|
-
#{FishNameNL::REGEX} \s*,?\s* \b #{FishNameLatin::REGEX} |
|
12
|
+
(?!#{REGEX_ALLERGEN}) #{FishNameNL::REGEX} (?: #{REGEX_ALLERGEN} )? \s* \( \s* #{FishNameLatin::REGEX} \s* \) |
|
13
|
+
(?!#{REGEX_ALLERGEN}) #{FishNameNL::REGEX} \s*,?\s* \b #{FishNameLatin::REGEX} |
|
13
14
|
#{FishNameLatin::REGEX} |
|
14
|
-
#{FishNameNL::REGEX}
|
15
|
+
(?!#{REGEX_ALLERGEN}) #{FishNameNL::REGEX}
|
15
16
|
)
|
16
17
|
/ix
|
17
18
|
|
@@ -1,11 +1,11 @@
|
|
1
|
-
# autogenerated by species-treetop-gen-nl.rb on 2020-03-
|
1
|
+
# autogenerated by species-treetop-gen-nl.rb on 2020-03-31
|
2
2
|
module FoodFishParser
|
3
3
|
module Flat
|
4
4
|
module FishNameNL
|
5
|
-
REGEX_AREA = /pacifische|noorse|japanse|indische|ijsselmeer|groenlandse|atlantische|argentijnse|alaska/i
|
5
|
+
REGEX_AREA = /pacifische|pacific|noorse|japanse|indische|ijsselmeer|groenlandse|atlantische|argentijnse|alaska/i
|
6
6
|
REGEX_ATTR = /zwarte|zwart|witte|witpoot|wit|roze|rood|rode|rivier|pijl|kleine|klein|grote|groot|groene|groen|grise|doorn|coho|chum|blonde|blond|blauwe|blauw/i
|
7
|
-
REGEX_NAME = /zwaardvis|zonnevis|zeewolf|zeesnoek|zeekreeft|zeeforel|zeebaars|zalm|wijting|weekdieren|weekdier|vliegende\ vis|vis|vintonijn|vin\ tonijn|tonijn\ albacore|tonijn|tong|tilapia|thon\ albacore|tarbot|tapijtschelp|surimi|steur|st\.\ pierre|sprot|spie|snotolf|snoekbaars|snoek|snapper|sliptong|skrei|skipjack\ tuna|skipjack\ tonijn|skipjack|seabob\ garnalen|schol|schelvis|schelpen|schelp|schar|sardines|sardinen|sardinelle|sardine|sardienen|sardien|saibling|rouget|rogvleugel|riddervis|regenboogforel|red\ snapper|red\ grouper|raat|poon|pollock|pollak|pilchard|pieterman|pangasius|paling|pagre|oogtonijn|nijlbaars|neustong|mul|mosselen|mossel|meerval|marlijn|mantelschelp|makreel|mahi\-mahi|mahi\ mahi|maatjesha|lom|leng|kreeft|krab|koolvis|kokkel|king\ clip|karper|kabeljauw|inktvis|hondstong|hoki|heilbot|heekrug|heek|haring|harder\ diklip|hamvis|hake|haai|gruis|grouper|griet|geep|geelvintonijn|geelvin\ tonijn|garnalen|garnaal|forel|fint|escolar|dorade|diklipharder|diklip\ harder|daurade|corvina|coquilles|conger\ aal|cocquilles|cobia|claresse|caviaar|braadha|botervis|bot|blood\ snapper|beekridder|barracumundi|barracuda|bacalao|baars|arkschelp|ansovis|ansjovis|alfonsino|albacore\ tonijn/i
|
8
|
-
REGEX_SUFX = /wang|vlees|steur|ringen|ring|moten|kaviaar|filets|filetes|filet|eiwit/i
|
7
|
+
REGEX_NAME = /zwaardvis|zonnevis|zeewolf|zeesnoek|zeekreeft|zeeforel|zeebaars|zalm|wijting|weekdieren|weekdier|vliegende\ vis|vis|vintonijn|vin\ tonijn|tonijn\ albacore|tonijn|tong|tilapia|thon\ albacore|tarbot|tapijtschelp|surimi|steur|st\.\ pierre|sprot|spie|sockeye\ zalm|snotolf|snoekbaars|snoek|snapper|sliptong|skrei|skipjack\ tuna|skipjack\ tonijn|skipjack|seabob\ garnalen|schol|schelvis|schelpen|schelp|schar|sardines|sardinen|sardinelle|sardine|sardienen|sardien|saibling|rouget|rogvleugel|riddervis|regenboogforel|red\ snapper|red\ grouper|raat|poon|pollock|pollak|pilchard|pieterman|pangasius|paling|pagre|oogtonijn|nijlbaars|neustong|mul|mosselen|mossel|meerval|marlijn|mantelschelp|makreel|mahi\-mahi|mahi\ mahi|maatjesha|lom|leng|kreeft|krab|koolvis|kokkel|king\ clip|karper|kabeljauw|inktvis|hondstong|hoki|heilbot|heekrug|heek|haring|harder\ diklip|hamvis|hake|haai|gruis|grouper|griet|geep|geelvintonijn|geelvin\ tonijn|garnalen|garnaal|forel|fint|escolar|dorade|diklipharder|diklip\ harder|daurade|corvina|coquilles|conger\ aal|cocquilles|cobia|claresse|caviaar|braadha|botervis|bot|blood\ snapper|beekridder|barracumundi|barracuda|bacalao|baars|arkschelp|ansovis|ansjovis|alfonsino|albacore\ tonijn/i
|
8
|
+
REGEX_SUFX = /wang|vlees|steur|ringen|ring|moten|lever|kaviaar|filets|filetes|filet|eiwit/i
|
9
9
|
|
10
10
|
REGEX = /
|
11
11
|
(?: \b verse \s+ | \b kaviaar \s+ | \b kaviaar \s+ van \s+ | \b gevilde \s+ )?
|
@@ -9,10 +9,11 @@ module FoodFishParser
|
|
9
9
|
(?:
|
10
10
|
gevangen |
|
11
11
|
visgebied |
|
12
|
-
|
13
|
-
|
12
|
+
vangst?gebied |
|
13
|
+
visvangst |
|
14
14
|
betrapt \s+ bij |
|
15
|
-
wilde? \s+ #{FishNameNL::REGEX}
|
15
|
+
wilde? \s+ #{FishNameNL::REGEX} |
|
16
|
+
MSC # certificate is for wild fish only
|
16
17
|
)
|
17
18
|
\b
|
18
19
|
/ix
|
@@ -24,7 +25,8 @@ module FoodFishParser
|
|
24
25
|
aquacultuurproduct |
|
25
26
|
aquacultuur \s+ product |
|
26
27
|
kweekmethode |
|
27
|
-
kweekmethoden
|
28
|
+
kweekmethoden |
|
29
|
+
ASC # certificate is for aquaculture fish only
|
28
30
|
)
|
29
31
|
\b
|
30
32
|
/ix
|
@@ -1,5 +1,8 @@
|
|
1
1
|
require_relative 'fish_name'
|
2
|
-
require_relative '
|
2
|
+
require_relative 'area_fao'
|
3
|
+
require_relative 'area_name'
|
4
|
+
require_relative 'catch_method'
|
5
|
+
require_relative 'aquac_method'
|
3
6
|
require_relative 'kind'
|
4
7
|
|
5
8
|
module FoodFishParser
|
@@ -17,31 +20,32 @@ module FoodFishParser
|
|
17
20
|
# @return [Array<Hash>] structured representation of fish details (maximum one item)
|
18
21
|
def parse(s, **options)
|
19
22
|
names = FishName.find_all(s)
|
20
|
-
areas =
|
21
|
-
|
23
|
+
areas = AreaName.find_all(s) + AreaFao.find_all(s)
|
24
|
+
catch_methods = CatchMethod.find_all(s)
|
25
|
+
aquac_methods = AquacMethod.find_all(s)
|
22
26
|
|
23
|
-
is_wild = Kind.is_wild?(s)
|
24
|
-
is_aquac = Kind.is_aquac?(s)
|
27
|
+
is_wild = catch_methods.any? || Kind.is_wild?(s)
|
28
|
+
is_aquac = aquac_methods.any? || Kind.is_aquac?(s)
|
25
29
|
|
26
|
-
return [] unless names.any? || areas.any?
|
30
|
+
return [] unless names.any? || aquac_methods.any? || catch_methods.any? || areas.any?
|
27
31
|
|
28
32
|
attrs = {
|
29
33
|
names: names,
|
30
34
|
catch_areas: [],
|
31
|
-
catch_methods:
|
35
|
+
catch_methods: catch_methods,
|
32
36
|
aquaculture_areas: [],
|
33
|
-
aquaculture_methods:
|
37
|
+
aquaculture_methods: aquac_methods
|
34
38
|
}
|
35
39
|
|
36
40
|
if is_wild && !is_aquac
|
37
|
-
[attrs.merge(catch_areas: areas
|
41
|
+
[attrs.merge(catch_areas: areas)]
|
38
42
|
elsif !is_wild && is_aquac
|
39
|
-
[attrs.merge(aquaculture_areas: areas
|
40
|
-
elsif areas.any?
|
43
|
+
[attrs.merge(aquaculture_areas: areas)]
|
44
|
+
elsif areas.any?
|
41
45
|
# We have a problem: either there are multiple fish and they're a mix of
|
42
46
|
# wild and aquaculture fish, or there is no such indication at all.
|
43
47
|
# For now, we return it in a non-standard way (this needs to be tackled).
|
44
|
-
[attrs.merge(areas: areas
|
48
|
+
[attrs.merge(areas: areas)]
|
45
49
|
else
|
46
50
|
# just names
|
47
51
|
[attrs]
|
@@ -7,7 +7,7 @@ module FoodFishParser::Strict::Grammar
|
|
7
7
|
rule catch_area_indicator
|
8
8
|
( ( comma ws* )? ( 'wildfang'i / 'wild'i ) ws* ( comma ws* )? )?
|
9
9
|
(
|
10
|
-
'gevangen'i ws+
|
10
|
+
'gevangen'i ws+ catch_area_indicator_preposition /
|
11
11
|
'visgebied'i / 'vangstgebied'i / 'vangsgebied'i / 'fanggebiet'i /
|
12
12
|
'betrapt bij'i
|
13
13
|
)
|
@@ -17,7 +17,11 @@ module FoodFishParser::Strict::Grammar
|
|
17
17
|
|
18
18
|
rule catch_area_indicator_short
|
19
19
|
catch_area_indicator /
|
20
|
-
|
20
|
+
catch_area_indicator_preposition !char ( ws* ':' )?
|
21
|
+
end
|
22
|
+
|
23
|
+
rule catch_area_indicator_preposition
|
24
|
+
'in'i / 'op'i / 'voor'i
|
21
25
|
end
|
22
26
|
|
23
27
|
rule catch_area_content
|
@@ -6,7 +6,7 @@ module FoodFishParser::Strict::Grammar
|
|
6
6
|
include FishNameNL
|
7
7
|
|
8
8
|
rule fish_name_both
|
9
|
-
( fish_name_nl ws* '(' ( ws* fish_allergen )? ws* fish_name_latin ( ws* ')' / comma )? )
|
9
|
+
( !fish_allergen fish_name_nl ws* '(' ( ws* fish_allergen )? ws* fish_name_latin ( ws* ')' / comma )? )
|
10
10
|
end
|
11
11
|
|
12
12
|
rule fish_name_both_list
|
@@ -20,8 +20,8 @@ module FoodFishParser::Strict::Grammar
|
|
20
20
|
end
|
21
21
|
|
22
22
|
rule fish_name_nl_list
|
23
|
-
( fish_name_nl <FishNameNode> )
|
24
|
-
( ws+ and_or ws+ fish_name_nl <FishNameNode> )*
|
23
|
+
( !fish_allergen fish_name_nl <FishNameNode> )
|
24
|
+
( ws+ and_or ws+ !fish_allergen fish_name_nl <FishNameNode> )*
|
25
25
|
end
|
26
26
|
|
27
27
|
rule fish_name_any_list
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# autogenerated by species-treetop-gen-nl.rb on 2020-03-
|
1
|
+
# autogenerated by species-treetop-gen-nl.rb on 2020-03-31
|
2
2
|
module FoodFishParser::Strict::Grammar
|
3
3
|
grammar FishNameNL
|
4
4
|
include Common
|
@@ -7,13 +7,19 @@ module FoodFishParser::Strict::Grammar
|
|
7
7
|
rule fish_name_nl
|
8
8
|
'(' ws* fish_name_nl ws* ')' /
|
9
9
|
( 'verse'i ws+ / 'kaviaar'i ws+ 'van'i ws+ / 'kaviaar'i ws+ / 'gevilde'i ws+ )?
|
10
|
-
(
|
10
|
+
(
|
11
|
+
fish_name_nl_area ws+ fish_name_nl_attr ws+ /
|
12
|
+
fish_name_nl_attr ws+ fish_name_nl_area ws+ /
|
13
|
+
fish_name_nl_area ws+ /
|
14
|
+
fish_name_nl_attr ws+
|
15
|
+
)?
|
16
|
+
fish_name_nl_name fish_name_nl_suffix?
|
11
17
|
( ws* fish_allergen )?
|
12
18
|
<FishNameCommonNode>
|
13
19
|
end
|
14
20
|
|
15
21
|
rule fish_name_nl_area
|
16
|
-
'pacifische'i / 'noorse'i / 'japanse'i / 'indische'i / 'ijsselmeer'i / 'groenlandse'i / 'atlantische'i / 'argentijnse'i / 'alaska'i
|
22
|
+
'pacifische'i / 'pacific'i / 'noorse'i / 'japanse'i / 'indische'i / 'ijsselmeer'i / 'groenlandse'i / 'atlantische'i / 'argentijnse'i / 'alaska'i
|
17
23
|
end
|
18
24
|
|
19
25
|
rule fish_name_nl_attr
|
@@ -21,11 +27,11 @@ module FoodFishParser::Strict::Grammar
|
|
21
27
|
end
|
22
28
|
|
23
29
|
rule fish_name_nl_name
|
24
|
-
'zwaardvis'i / 'zonnevis'i / 'zeewolf'i / 'zeesnoek'i / 'zeekreeft'i / 'zeeforel'i / 'zeebaars'i / 'zalm'i / 'wijting'i / 'weekdieren'i / 'weekdier'i / 'vliegende vis'i / 'vis'i / 'vintonijn'i / 'vin tonijn'i / 'tonijn albacore'i / 'tonijn'i / 'tong'i / 'tilapia'i / 'thon albacore'i / 'tarbot'i / 'tapijtschelp'i / 'surimi'i / 'steur'i / 'st. pierre'i / 'sprot'i / 'spie'i / 'snotolf'i / 'snoekbaars'i / 'snoek'i / 'snapper'i / 'sliptong'i / 'skrei'i / 'skipjack tuna'i / 'skipjack tonijn'i / 'skipjack'i / 'seabob garnalen'i / 'schol'i / 'schelvis'i / 'schelpen'i / 'schelp'i / 'schar'i / 'sardines'i / 'sardinen'i / 'sardinelle'i / 'sardine'i / 'sardienen'i / 'sardien'i / 'saibling'i / 'rouget'i / 'rogvleugel'i / 'riddervis'i / 'regenboogforel'i / 'red snapper'i / 'red grouper'i / 'raat'i / 'poon'i / 'pollock'i / 'pollak'i / 'pilchard'i / 'pieterman'i / 'pangasius'i / 'paling'i / 'pagre'i / 'oogtonijn'i / 'nijlbaars'i / 'neustong'i / 'mul'i / 'mosselen'i / 'mossel'i / 'meerval'i / 'marlijn'i / 'mantelschelp'i / 'makreel'i / 'mahi-mahi'i / 'mahi mahi'i / 'maatjesha'i / 'lom'i / 'leng'i / 'kreeft'i / 'krab'i / 'koolvis'i / 'kokkel'i / 'king clip'i / 'karper'i / 'kabeljauw'i / 'inktvis'i / 'hondstong'i / 'hoki'i / 'heilbot'i / 'heekrug'i / 'heek'i / 'haring'i / 'harder diklip'i / 'hamvis'i / 'hake'i / 'haai'i / 'gruis'i / 'grouper'i / 'griet'i / 'geep'i / 'geelvintonijn'i / 'geelvin tonijn'i / 'garnalen'i / 'garnaal'i / 'forel'i / 'fint'i / 'escolar'i / 'dorade'i / 'diklipharder'i / 'diklip harder'i / 'daurade'i / 'corvina'i / 'coquilles'i / 'conger aal'i / 'cocquilles'i / 'cobia'i / 'claresse'i / 'caviaar'i / 'braadha'i / 'botervis'i / 'bot'i / 'blood snapper'i / 'beekridder'i / 'barracumundi'i / 'barracuda'i / 'bacalao'i / 'baars'i / 'arkschelp'i / 'ansovis'i / 'ansjovis'i / 'alfonsino'i / 'albacore tonijn'i
|
30
|
+
'zwaardvis'i / 'zonnevis'i / 'zeewolf'i / 'zeesnoek'i / 'zeekreeft'i / 'zeeforel'i / 'zeebaars'i / 'zalm'i / 'wijting'i / 'weekdieren'i / 'weekdier'i / 'vliegende vis'i / 'vis'i / 'vintonijn'i / 'vin tonijn'i / 'tonijn albacore'i / 'tonijn'i / 'tong'i / 'tilapia'i / 'thon albacore'i / 'tarbot'i / 'tapijtschelp'i / 'surimi'i / 'steur'i / 'st. pierre'i / 'sprot'i / 'spie'i / 'sockeye zalm'i / 'snotolf'i / 'snoekbaars'i / 'snoek'i / 'snapper'i / 'sliptong'i / 'skrei'i / 'skipjack tuna'i / 'skipjack tonijn'i / 'skipjack'i / 'seabob garnalen'i / 'schol'i / 'schelvis'i / 'schelpen'i / 'schelp'i / 'schar'i / 'sardines'i / 'sardinen'i / 'sardinelle'i / 'sardine'i / 'sardienen'i / 'sardien'i / 'saibling'i / 'rouget'i / 'rogvleugel'i / 'riddervis'i / 'regenboogforel'i / 'red snapper'i / 'red grouper'i / 'raat'i / 'poon'i / 'pollock'i / 'pollak'i / 'pilchard'i / 'pieterman'i / 'pangasius'i / 'paling'i / 'pagre'i / 'oogtonijn'i / 'nijlbaars'i / 'neustong'i / 'mul'i / 'mosselen'i / 'mossel'i / 'meerval'i / 'marlijn'i / 'mantelschelp'i / 'makreel'i / 'mahi-mahi'i / 'mahi mahi'i / 'maatjesha'i / 'lom'i / 'leng'i / 'kreeft'i / 'krab'i / 'koolvis'i / 'kokkel'i / 'king clip'i / 'karper'i / 'kabeljauw'i / 'inktvis'i / 'hondstong'i / 'hoki'i / 'heilbot'i / 'heekrug'i / 'heek'i / 'haring'i / 'harder diklip'i / 'hamvis'i / 'hake'i / 'haai'i / 'gruis'i / 'grouper'i / 'griet'i / 'geep'i / 'geelvintonijn'i / 'geelvin tonijn'i / 'garnalen'i / 'garnaal'i / 'forel'i / 'fint'i / 'escolar'i / 'dorade'i / 'diklipharder'i / 'diklip harder'i / 'daurade'i / 'corvina'i / 'coquilles'i / 'conger aal'i / 'cocquilles'i / 'cobia'i / 'claresse'i / 'caviaar'i / 'braadha'i / 'botervis'i / 'bot'i / 'blood snapper'i / 'beekridder'i / 'barracumundi'i / 'barracuda'i / 'bacalao'i / 'baars'i / 'arkschelp'i / 'ansovis'i / 'ansjovis'i / 'alfonsino'i / 'albacore tonijn'i
|
25
31
|
end
|
26
32
|
|
27
33
|
rule fish_name_nl_suffix
|
28
|
-
'wang'i / 'vlees'i / 'steur'i / 'ringen'i / 'ring'i / 'moten'i / 'kaviaar'i / 'filets'i / 'filetes'i / 'filet'i / 'eiwit'i
|
34
|
+
'wang'i / 'vlees'i / 'steur'i / 'ringen'i / 'ring'i / 'moten'i / 'lever'i / 'kaviaar'i / 'filets'i / 'filetes'i / 'filet'i / 'eiwit'i
|
29
35
|
end
|
30
36
|
end
|
31
37
|
end
|
@@ -54,8 +54,9 @@ module FoodFishParser::Strict::Grammar
|
|
54
54
|
# fish with catch or aquaculture info
|
55
55
|
rule fish_with_info
|
56
56
|
(
|
57
|
-
|
58
|
-
( fish_name_any_list ( ws* ( comma / ':' ) )? ws+
|
57
|
+
# @todo move optional '(' after common fish name to root and properly match start and end brackets
|
58
|
+
( fish_name_any_list ( ws* ( comma / ':' ) )? ws+ fish_catch_info ( ws* ')' )? ) /
|
59
|
+
( fish_name_any_list ( ws* ( comma / ':' ) )? ws+ fish_aquac_info ( ws* ')' )? )
|
59
60
|
)
|
60
61
|
<FishNode>
|
61
62
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: food_fish_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wvengen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|
@@ -43,7 +43,10 @@ files:
|
|
43
43
|
- bin/food_fish_parser
|
44
44
|
- food_fish_parser.gemspec
|
45
45
|
- lib/food_fish_parser.rb
|
46
|
-
- lib/food_fish_parser/flat/
|
46
|
+
- lib/food_fish_parser/flat/aquac_method.rb
|
47
|
+
- lib/food_fish_parser/flat/area_fao.rb
|
48
|
+
- lib/food_fish_parser/flat/area_name.rb
|
49
|
+
- lib/food_fish_parser/flat/catch_method.rb
|
47
50
|
- lib/food_fish_parser/flat/fish_name.rb
|
48
51
|
- lib/food_fish_parser/flat/fish_name_latin.rb
|
49
52
|
- lib/food_fish_parser/flat/fish_name_nl.rb
|