tiered_category_expressions 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/.yardopts +8 -0
- data/Gemfile +4 -0
- data/LANGREF.md +144 -0
- data/LICENSE.txt +21 -0
- data/README.md +75 -0
- data/Rakefile +22 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/tiered_category_expressions/core.rb +11 -0
- data/lib/tiered_category_expressions/expression.rb +121 -0
- data/lib/tiered_category_expressions/name.rb +22 -0
- data/lib/tiered_category_expressions/namelist.rb +19 -0
- data/lib/tiered_category_expressions/parser.rb +25 -0
- data/lib/tiered_category_expressions/preprocessor.rb +30 -0
- data/lib/tiered_category_expressions/tiers.rb +71 -0
- data/lib/tiered_category_expressions/transformer.rb +26 -0
- data/lib/tiered_category_expressions/util.rb +20 -0
- data/lib/tiered_category_expressions/version.rb +3 -0
- data/lib/tiered_category_expressions.rb +7 -0
- data/tiered_category_expressions.gemspec +39 -0
- metadata +167 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 15ca9e46201d4f51bcc1734b3fbca4486118dc67
|
4
|
+
data.tar.gz: 897342e4cdd92b79264757cbd6a164fb390f3617
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 20d53471af2037ffbbea9df52d5fc62541347af70578cf9ea477d4de5605fac4cb7990fb8913dd43f9225e3a218a41e761356e786d4607583777faa206237513
|
7
|
+
data.tar.gz: 602f8bc18e5e32fa16dabce62c9af90257d8492075fea94e3d89d51e610f578c1e8d82419f6375a53314b49f74eae1a96aefc7f164787ecbacb01681bbb14141
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.4.5
|
data/.travis.yml
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LANGREF.md
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
# TCE language reference
|
2
|
+
|
3
|
+
## Introduction
|
4
|
+
|
5
|
+
_Tiered category expressions_ (TCEs) are written in a language specifically designed for matching tiered categories.
|
6
|
+
|
7
|
+
A tiered category is defined as a list of tiers where the left most tier is the most generic one (root) and each subsequent tier is a specification (child) of its predecessor (parent). Describing a path through the category tree, up from the root. For instance:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
["AGF", "Groente", "Komkommer"]
|
11
|
+
```
|
12
|
+
|
13
|
+
TCEs follow the same structure. They consist of category tier names separated by special separators. Here are some examples to get an idea of what TCEs look like:
|
14
|
+
|
15
|
+
- `agf > groente`
|
16
|
+
- `agf > groente > komkommer`
|
17
|
+
- `agf > groente > kom%`
|
18
|
+
- `agf > groente | fruit > komkommer`
|
19
|
+
- `agf > groente > !tomaat`
|
20
|
+
- `agf >> komkommer`
|
21
|
+
- `>> komkommer`
|
22
|
+
|
23
|
+
All of these examples match the category `["AGF", "Groente", "Komkommer"]`. The syntax is explained in more detail in the [next section](#Syntax).
|
24
|
+
|
25
|
+
When a TCE is matched with a category:
|
26
|
+
|
27
|
+
- Letter case is ignored, e.g. both "NONFOOD" and "NonFood" match "nonfood" and vice versa.
|
28
|
+
- Everything other than alphabetic and numeric characters (including spaces) is ignored, e.g. "nonfood" matches both "non-food" and "non food".
|
29
|
+
- Accents are ignored, e.g. "knäckebröd" matches "knackebrod" and vice versa.
|
30
|
+
- It matches a _subtree_ of categories, e.g. the TCE `"nonfood"` matches the category `["Nonfood"]` as well as `["Nonfood", "Schoonmaak", "Soda"]`.
|
31
|
+
|
32
|
+
Even though they are ignored, tier names in TCEs may contain spaces, upper and lowercase characters and accented characters. To the contrary, they **cannot** contain special characters.
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
# Invalid
|
36
|
+
"Brood & deegwaren > Volkoren-knäckebröd"
|
37
|
+
|
38
|
+
# Valid
|
39
|
+
"Brood deegwaren > Volkoren knäckebröd"
|
40
|
+
```
|
41
|
+
|
42
|
+
Note that TCEs are considered equal if they match the same categories. E.g. these TCEs are equal:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
"brood crackers > knäckebröd"
|
46
|
+
"brood crackers > KNACKEBROD"
|
47
|
+
"broodcrackers > k n ä c k e b r ö d"
|
48
|
+
```
|
49
|
+
|
50
|
+
## Syntax
|
51
|
+
|
52
|
+
### Matching direct children `>`
|
53
|
+
```ruby
|
54
|
+
"agf > groente > komkommer"
|
55
|
+
|
56
|
+
# Matches
|
57
|
+
["AGF", "Groente", "Komkommer"]
|
58
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
59
|
+
|
60
|
+
# Does not match
|
61
|
+
["AGF", "Groente"]
|
62
|
+
["AGF", "Groente", "Tomaat"]
|
63
|
+
["Groente & fruit", "Groente", "Komkommer"]
|
64
|
+
```
|
65
|
+
|
66
|
+
### Matching descendants at any depth `>>`
|
67
|
+
```ruby
|
68
|
+
"agf >> komkommer"
|
69
|
+
|
70
|
+
# Matches
|
71
|
+
["AGF", "Komkommer"]
|
72
|
+
["AGF", "Komkommer", "Snack komkommer"]
|
73
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
74
|
+
|
75
|
+
# Does not match
|
76
|
+
["AGF"]
|
77
|
+
["AGF", "Snack komkommer"]
|
78
|
+
["AGF", "Groente", "Snack komkommer"]
|
79
|
+
```
|
80
|
+
|
81
|
+
### Wildcards `%`
|
82
|
+
```ruby
|
83
|
+
"groente% > %komkommer"
|
84
|
+
|
85
|
+
# Matches
|
86
|
+
["Groente", "Komkommer"]
|
87
|
+
["Groente & fruit", "Snack komkommer"]
|
88
|
+
```
|
89
|
+
|
90
|
+
### Negation `!`
|
91
|
+
```ruby
|
92
|
+
"!komkommer"
|
93
|
+
|
94
|
+
# Does not match
|
95
|
+
["Komkommer"]
|
96
|
+
```
|
97
|
+
|
98
|
+
### Lists `|`
|
99
|
+
```ruby
|
100
|
+
"veldsla | ijsbergsla | rucola"
|
101
|
+
|
102
|
+
# Matches
|
103
|
+
["Veldsla"]
|
104
|
+
["IJsbergsla"]
|
105
|
+
["Rucola"]
|
106
|
+
```
|
107
|
+
|
108
|
+
### Combining patterns
|
109
|
+
```ruby
|
110
|
+
"groente > seizoensgroente > %"
|
111
|
+
|
112
|
+
# Matches
|
113
|
+
["Groente", "Seizoensgroente", "Pastinaak"]
|
114
|
+
["Groente", "Seizoensgroente", "Vers", "Pastinaak"]
|
115
|
+
|
116
|
+
# Does not match
|
117
|
+
["Groente", "Seizoensgroente"]
|
118
|
+
```
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
">> !komkommer%"
|
122
|
+
|
123
|
+
# Does not match
|
124
|
+
["Komkommer"]
|
125
|
+
["Komkommer & fruit"]
|
126
|
+
["AGF", "Komkommer"]
|
127
|
+
["AGF", "Komkommer & fruit"]
|
128
|
+
["AGF", "Groente", "Komkommer"]
|
129
|
+
```
|
130
|
+
|
131
|
+
```ruby
|
132
|
+
"nonfood >> ! babyvoeding | diervoeding"
|
133
|
+
|
134
|
+
# Matches
|
135
|
+
["Nonfood", "Baby", "Flessen"]
|
136
|
+
["Nonfood", "Huisdier", "Aanlijnriemen"]
|
137
|
+
|
138
|
+
# Does not match
|
139
|
+
["Nonfood", "Baby", "Babyvoeding"]
|
140
|
+
["Nonfood", "Huisdier", "Diervoeding"]
|
141
|
+
["Nonfood", "Babyvoeding"]
|
142
|
+
["Nonfood", "Diervoeding"]
|
143
|
+
["Nonfood"]
|
144
|
+
```
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2019 Stichting Questionmark
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# Tiered Category Expressions
|
2
|
+
|
3
|
+
Work with TCEs in Ruby.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'tiered_category_expressions'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install tiered_category_expressions
|
20
|
+
|
21
|
+
And require with:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'tiered_category_expressions'
|
25
|
+
```
|
26
|
+
|
27
|
+
Or, if you don't want the `TCE()` alias in the global namespace, require with:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require 'tiered_category_expressions/core'
|
31
|
+
```
|
32
|
+
|
33
|
+
## Documentation
|
34
|
+
|
35
|
+
- [Library documentation](https://rubydocs.info/tiered_category_expressions)
|
36
|
+
- [TCE language reference](https://rubydocs.info/tiered_category_expressions/file.LANGREF.html)
|
37
|
+
|
38
|
+
## Usage examples
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
tce = TCE("groceries > nonfood | pharmacy >> !baby formula")
|
42
|
+
# => TieredCategoryExpressions::Expression[groceries > nonfood | pharmacy >> !baby formula]
|
43
|
+
|
44
|
+
tce.matches?(["Groceries", "Non-food", "Cleaning", "Soap"])
|
45
|
+
# => true
|
46
|
+
|
47
|
+
tce.matches?(["Groceries", "Non-food", "Baby", "Baby formula"])
|
48
|
+
# => false
|
49
|
+
|
50
|
+
tce.matches?(["Groceries", "Pharmacy", "Baby", "Pacifiers"])
|
51
|
+
# => true
|
52
|
+
|
53
|
+
tce.to_regexp
|
54
|
+
# => /^(groceries)>(nonfood|pharmacy)>(?!(.+>)*(babyformula)>).+>/i
|
55
|
+
|
56
|
+
tce.as_regexp
|
57
|
+
# => "^(groceries)>(nonfood|pharmacy)>(?!(.+>)*(babyformula)>).+>"
|
58
|
+
|
59
|
+
TCE("groceries > nonfood") > TCE("baby") > ">> pacifiers"
|
60
|
+
# => TieredCategoryExpressions::Expression[groceries > nonfood > baby >> pacifiers]
|
61
|
+
```
|
62
|
+
|
63
|
+
## Development
|
64
|
+
|
65
|
+
Run `rake spec` to run the tests. You can also run `bundle console` for an interactive prompt that will allow you to experiment.
|
66
|
+
|
67
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb` and run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
68
|
+
|
69
|
+
## Contributing
|
70
|
+
|
71
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/q-m/tiered_category_expressions.
|
72
|
+
|
73
|
+
## License
|
74
|
+
|
75
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
namespace :docs do
|
9
|
+
desc "Build the YARD documentation"
|
10
|
+
task :build do
|
11
|
+
title = "tiered_category_expressions (v#{TieredCategoryExpressions::VERSION})"
|
12
|
+
print `bundle exec yardoc --title '#{title}'`
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "View the YARD documentation in your browser"
|
16
|
+
task :view do
|
17
|
+
print `open ./doc/index.html`
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
desc "Build and view the YARD documentation"
|
22
|
+
task :docs => ["docs:build", "docs:view"]
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "tiered_category_expressions"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
require "tiered_category_expressions/parser"
|
2
|
+
require "tiered_category_expressions/transformer"
|
3
|
+
require "tiered_category_expressions/preprocessor"
|
4
|
+
require "tiered_category_expressions/util"
|
5
|
+
|
6
|
+
module TieredCategoryExpressions
|
7
|
+
class ParseError < StandardError; end
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Converts input to an {Expression}.
|
11
|
+
#
|
12
|
+
# @param expression [Expression, #to_s]
|
13
|
+
# @return [Expression]
|
14
|
+
# @raise [ParseError] Raises if TCE syntax is invalid
|
15
|
+
#
|
16
|
+
def Expression(expression)
|
17
|
+
case expression
|
18
|
+
when TieredCategoryExpressions::Expression then expression
|
19
|
+
else TieredCategoryExpressions::Expression.parse(expression.to_s)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
alias TCE Expression
|
23
|
+
end
|
24
|
+
|
25
|
+
class Expression
|
26
|
+
# @param str [String] Tiered category expression to parse
|
27
|
+
# @return [Expression]
|
28
|
+
# @raise [ParseError] Raises if TCE syntax is invalid
|
29
|
+
#
|
30
|
+
def self.parse(str)
|
31
|
+
tree = TieredCategoryExpressions::Parser.new.parse(str)
|
32
|
+
TieredCategoryExpressions::Transformer.new.apply(tree)
|
33
|
+
rescue Parslet::ParseFailed => e
|
34
|
+
deepest = Util.deepest_parse_failure_cause(e.parse_failure_cause)
|
35
|
+
_, column = deepest.source.line_and_column(deepest.pos)
|
36
|
+
raise ParseError, "unexpected input at character #{column}"
|
37
|
+
end
|
38
|
+
|
39
|
+
# @!visibility private
|
40
|
+
def initialize(tiers)
|
41
|
+
@tiers = tiers
|
42
|
+
end
|
43
|
+
|
44
|
+
# @!visibility private
|
45
|
+
def inspect
|
46
|
+
"TieredCategoryExpressions::Expression[#{self}]"
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String] String representation of the expression
|
50
|
+
def to_s
|
51
|
+
@tiers.join(" ").sub(/^>(?!>)\s*/, "") # Initial ">" is implied (but ">>" is not)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [Regexp] Regexp representation of the expression as a string (does not include flags)
|
55
|
+
def as_regexp
|
56
|
+
"^#{@tiers.map(&:as_regexp).join}"
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [String] Regexp representation of the expression
|
60
|
+
def to_regexp
|
61
|
+
/#{as_regexp}/i
|
62
|
+
end
|
63
|
+
|
64
|
+
# Matches the expression with the given category.
|
65
|
+
#
|
66
|
+
# @param category [Array<String>] Category to match
|
67
|
+
# @return [Boolean]
|
68
|
+
def matches?(category)
|
69
|
+
to_regexp.match?(Preprocessor.call(category))
|
70
|
+
end
|
71
|
+
alias === matches?
|
72
|
+
|
73
|
+
# Returns +true+ if both expressions are equal. Expressions are considered equal if they match the same categories.
|
74
|
+
#
|
75
|
+
# @param other [Expression, #to_s]
|
76
|
+
# @return [Boolean]
|
77
|
+
#
|
78
|
+
def ==(other)
|
79
|
+
to_regexp == TCE(other).to_regexp
|
80
|
+
end
|
81
|
+
|
82
|
+
# Concatenates two expressions.
|
83
|
+
#
|
84
|
+
# @example
|
85
|
+
# TCE("foo") > "!bar" > TCE(">> baz")
|
86
|
+
# # => TieredCategoryExpressions::Expression[foo > !bar >> baz]
|
87
|
+
#
|
88
|
+
# @param other [Expression, #to_s]
|
89
|
+
# @return [Expression]
|
90
|
+
#
|
91
|
+
def >(other)
|
92
|
+
self.class.new(@tiers + TCE(other).tiers)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Returns an SQL LIKE query that may be used to speed up certain SQL queries.
|
96
|
+
#
|
97
|
+
# SQL queries that involve matching some input against stored TCE regexps can be slow. Possibly, they can be
|
98
|
+
# optimized by applying a much faster LIKE query first, which reduces the number of regexps to apply. The LIKE
|
99
|
+
# query alone still yields false positives, so it must be combined with the corresponding regexp.
|
100
|
+
#
|
101
|
+
# For instance:
|
102
|
+
#
|
103
|
+
# SELECT * FROM mappings WHERE 'foo>bar>baz>' LIKE tce_like_query AND 'foo>bar>baz>' ~ tce_regexp
|
104
|
+
#
|
105
|
+
# Can be much faster than:
|
106
|
+
#
|
107
|
+
# SELECT * FROM mappings WHERE 'foo>bar>baz>' ~ tce_regexp
|
108
|
+
#
|
109
|
+
# Depending on the TCEs in the _mappings_ table.
|
110
|
+
#
|
111
|
+
def as_sql_like_query
|
112
|
+
@tiers.map(&:as_sql_like_query).join + "%"
|
113
|
+
end
|
114
|
+
|
115
|
+
protected
|
116
|
+
|
117
|
+
def tiers
|
118
|
+
@tiers
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "tiered_category_expressions/util"
|
2
|
+
|
3
|
+
module TieredCategoryExpressions
|
4
|
+
class Name
|
5
|
+
def initialize(name)
|
6
|
+
@name = name.strip.gsub(/%+/, "%").gsub(/\s+/, " ")
|
7
|
+
@normalized_name = Util.transliterate(@name.downcase).tr(" ", "")
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
@name
|
12
|
+
end
|
13
|
+
|
14
|
+
def as_regexp
|
15
|
+
@normalized_name.gsub(/%/, ".*")
|
16
|
+
end
|
17
|
+
|
18
|
+
def as_sql_like_query
|
19
|
+
@normalized_name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module TieredCategoryExpressions
|
2
|
+
class Namelist
|
3
|
+
def initialize(names)
|
4
|
+
@names = names.sort_by(&:to_s)
|
5
|
+
end
|
6
|
+
|
7
|
+
def to_s
|
8
|
+
@names.join(" | ")
|
9
|
+
end
|
10
|
+
|
11
|
+
def as_regexp
|
12
|
+
"(#{@names.map(&:as_regexp).join('|')})"
|
13
|
+
end
|
14
|
+
|
15
|
+
def as_sql_like_query
|
16
|
+
@names.size == 1 ? @names[0].as_sql_like_query : "%"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require "parslet"
|
2
|
+
|
3
|
+
module TieredCategoryExpressions
|
4
|
+
class Parser < Parslet::Parser
|
5
|
+
rule(:space) { match('\s').repeat(1) }
|
6
|
+
rule(:space?) { space.maybe }
|
7
|
+
rule(:negator) { str("!") >> space? }
|
8
|
+
|
9
|
+
rule(:sep) { str(">") >> space? }
|
10
|
+
rule(:isep) { sep >> negator }
|
11
|
+
rule(:sepsep) { str(">>") >> space? }
|
12
|
+
rule(:isepsep) { sepsep >> negator }
|
13
|
+
|
14
|
+
rule(:connector) { (isepsep | sepsep | isep | sep) }
|
15
|
+
|
16
|
+
rule(:namesep) { str("|") >> space? }
|
17
|
+
|
18
|
+
rule(:word) { (match["[:alnum:]"] | str("%")).repeat(1) >> space? }
|
19
|
+
rule(:name) { word.repeat(1).as(:name) }
|
20
|
+
rule(:namelist) { (name >> (namesep >> name).repeat).as(:namelist) }
|
21
|
+
|
22
|
+
rule(:tce) { space? >> (((connector | negator).as(:operator).maybe >> namelist).as(:tier) >> (connector.as(:operator) >> namelist).as(:tier).repeat).as(:expression) }
|
23
|
+
root(:tce)
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module TieredCategoryExpressions
|
2
|
+
# @internal TODO Deal with digits and dashes ("1-3 months" != "13months")
|
3
|
+
module Preprocessor
|
4
|
+
class << self
|
5
|
+
# Converts a category to a string suitable for matching with TCE regexps.
|
6
|
+
#
|
7
|
+
# @example
|
8
|
+
# category = ["Non-food", "Cosmetics"]
|
9
|
+
# preprocessed_category = TieredCategoryExpressions::Preprocessor.call(category)
|
10
|
+
# TCE("nonfood > cosmetics").to_regexp.match?(preprocessed_category)
|
11
|
+
# # => true
|
12
|
+
#
|
13
|
+
# @param category [Array<String>]
|
14
|
+
# @return [String]
|
15
|
+
#
|
16
|
+
def call(category)
|
17
|
+
return "" if category.empty?
|
18
|
+
|
19
|
+
category.map { |t| sanitize_name(t) }.join(">") + ">"
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def sanitize_name(str)
|
25
|
+
str = Util.transliterate(str)
|
26
|
+
str.downcase.gsub(/[^a-z0-9]/, "") # remove all non word & non space characters
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module TieredCategoryExpressions
|
2
|
+
class Tier < Struct.new(:operator, :namelist)
|
3
|
+
def to_s
|
4
|
+
"#{operator} #{namelist}"
|
5
|
+
end
|
6
|
+
|
7
|
+
def as_regexp
|
8
|
+
raise NotImplementedError, "subclasses of Tier must implement `#as_regexp`"
|
9
|
+
end
|
10
|
+
|
11
|
+
def as_sql_like_query
|
12
|
+
raise NotImplementedError, "subclasses of Tier must implement `#as_sql_like_query`"
|
13
|
+
end
|
14
|
+
|
15
|
+
class Child < Tier
|
16
|
+
def initialize(namelist)
|
17
|
+
super(">", namelist)
|
18
|
+
end
|
19
|
+
|
20
|
+
def as_regexp
|
21
|
+
"#{namelist.as_regexp}>"
|
22
|
+
end
|
23
|
+
|
24
|
+
def as_sql_like_query
|
25
|
+
"#{namelist.as_sql_like_query}>"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class IChild < Tier
|
30
|
+
def initialize(namelist)
|
31
|
+
super("> !", namelist)
|
32
|
+
end
|
33
|
+
|
34
|
+
def as_regexp
|
35
|
+
"(?!#{namelist.as_regexp}>).+>"
|
36
|
+
end
|
37
|
+
|
38
|
+
def as_sql_like_query
|
39
|
+
"%>"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class Descendant < Tier
|
44
|
+
def initialize(namelist)
|
45
|
+
super(">>", namelist)
|
46
|
+
end
|
47
|
+
|
48
|
+
def as_regexp
|
49
|
+
"(.+>)*#{namelist.as_regexp}>"
|
50
|
+
end
|
51
|
+
|
52
|
+
def as_sql_like_query
|
53
|
+
"%#{namelist.as_sql_like_query}>".gsub(/%+/, "%")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class IDescendant < Tier
|
58
|
+
def initialize(namelist)
|
59
|
+
super(">> !", namelist)
|
60
|
+
end
|
61
|
+
|
62
|
+
def as_regexp
|
63
|
+
"(?!(.+>)*#{namelist.as_regexp}>).+>"
|
64
|
+
end
|
65
|
+
|
66
|
+
def as_sql_like_query
|
67
|
+
"%>"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "parslet"
|
2
|
+
require "tiered_category_expressions/name"
|
3
|
+
require "tiered_category_expressions/namelist"
|
4
|
+
require "tiered_category_expressions/tiers"
|
5
|
+
require "tiered_category_expressions/expression"
|
6
|
+
|
7
|
+
module TieredCategoryExpressions
|
8
|
+
class Transformer < Parslet::Transform
|
9
|
+
rule(:name => simple(:name)) { Name.new(name.to_s) }
|
10
|
+
|
11
|
+
rule(:tier => subtree(:tier)) do
|
12
|
+
klass = case tier[:operator]&.to_s&.tr(" ", "")
|
13
|
+
when ">", nil then Tier::Child
|
14
|
+
when ">!", "!" then Tier::IChild
|
15
|
+
when ">>" then Tier::Descendant
|
16
|
+
when ">>!" then Tier::IDescendant
|
17
|
+
else raise "no such operator #{tier[:operator].inspect}"
|
18
|
+
end
|
19
|
+
|
20
|
+
namelist = Namelist.new([tier[:namelist]].flatten)
|
21
|
+
klass.new(namelist)
|
22
|
+
end
|
23
|
+
|
24
|
+
rule(:expression => subtree(:tiers)) { Expression.new([tiers].flatten) }
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module TieredCategoryExpressions
|
2
|
+
module Util
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def deepest_parse_failure_cause(cause)
|
6
|
+
if cause.children.any?
|
7
|
+
deepest_parse_failure_cause(cause.children.last)
|
8
|
+
else
|
9
|
+
cause
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def transliterate(str)
|
14
|
+
str.tr(
|
15
|
+
"ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšſŢţŤťŦŧÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷŸŹźŻżŽž",
|
16
|
+
"AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssTtTtTtUUUUuuuuUuUuUuUuUuUuWwYyyYyYZzZzZz"
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require "tiered_category_expressions/core"
|
2
|
+
include TieredCategoryExpressions::Core
|
3
|
+
|
4
|
+
# @!method TCE
|
5
|
+
# Alias of {TieredCategoryExpressions.TCE}. By default it is added to the global namespace. To avoid this, require
|
6
|
+
# this gem with:
|
7
|
+
# require 'tiered_category_expressions/core'
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'tiered_category_expressions/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "tiered_category_expressions"
|
8
|
+
spec.version = TieredCategoryExpressions::VERSION
|
9
|
+
spec.authors = ["Sjoerd Andringa"]
|
10
|
+
spec.email = ["sjoerd@thequestionmark.org"]
|
11
|
+
|
12
|
+
spec.summary = %q{Tiered category expressions}
|
13
|
+
spec.homepage = "https://github.com/q-m/tiered_category_expressions"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
17
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
18
|
+
if spec.respond_to?(:metadata)
|
19
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
20
|
+
else
|
21
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
22
|
+
"public gem pushes."
|
23
|
+
end
|
24
|
+
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
26
|
+
f.match(%r{^(test|spec|features)/})
|
27
|
+
end
|
28
|
+
spec.bindir = "exe"
|
29
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
|
+
spec.require_paths = ["lib"]
|
31
|
+
|
32
|
+
spec.add_development_dependency "bundler", "~> 1.13"
|
33
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
35
|
+
spec.add_development_dependency "yard"
|
36
|
+
spec.add_development_dependency "pry"
|
37
|
+
spec.add_development_dependency "pry-coolline"
|
38
|
+
spec.add_runtime_dependency "parslet", "~> 1.8"
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tiered_category_expressions
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sjoerd Andringa
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-04-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.13'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: yard
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry-coolline
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: parslet
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.8'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.8'
|
111
|
+
description:
|
112
|
+
email:
|
113
|
+
- sjoerd@thequestionmark.org
|
114
|
+
executables: []
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- ".gitignore"
|
119
|
+
- ".rspec"
|
120
|
+
- ".ruby-version"
|
121
|
+
- ".travis.yml"
|
122
|
+
- ".yardopts"
|
123
|
+
- Gemfile
|
124
|
+
- LANGREF.md
|
125
|
+
- LICENSE.txt
|
126
|
+
- README.md
|
127
|
+
- Rakefile
|
128
|
+
- bin/console
|
129
|
+
- bin/setup
|
130
|
+
- lib/tiered_category_expressions.rb
|
131
|
+
- lib/tiered_category_expressions/core.rb
|
132
|
+
- lib/tiered_category_expressions/expression.rb
|
133
|
+
- lib/tiered_category_expressions/name.rb
|
134
|
+
- lib/tiered_category_expressions/namelist.rb
|
135
|
+
- lib/tiered_category_expressions/parser.rb
|
136
|
+
- lib/tiered_category_expressions/preprocessor.rb
|
137
|
+
- lib/tiered_category_expressions/tiers.rb
|
138
|
+
- lib/tiered_category_expressions/transformer.rb
|
139
|
+
- lib/tiered_category_expressions/util.rb
|
140
|
+
- lib/tiered_category_expressions/version.rb
|
141
|
+
- tiered_category_expressions.gemspec
|
142
|
+
homepage: https://github.com/q-m/tiered_category_expressions
|
143
|
+
licenses:
|
144
|
+
- MIT
|
145
|
+
metadata:
|
146
|
+
allowed_push_host: https://rubygems.org
|
147
|
+
post_install_message:
|
148
|
+
rdoc_options: []
|
149
|
+
require_paths:
|
150
|
+
- lib
|
151
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
152
|
+
requirements:
|
153
|
+
- - ">="
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: '0'
|
156
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
|
+
requirements:
|
158
|
+
- - ">="
|
159
|
+
- !ruby/object:Gem::Version
|
160
|
+
version: '0'
|
161
|
+
requirements: []
|
162
|
+
rubyforge_project:
|
163
|
+
rubygems_version: 2.6.14.3
|
164
|
+
signing_key:
|
165
|
+
specification_version: 4
|
166
|
+
summary: Tiered category expressions
|
167
|
+
test_files: []
|