tiered_category_expressions 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LANGREF.md +55 -1
- data/README.md +7 -5
- data/lib/tiered_category_expressions/expression.rb +24 -7
- data/lib/tiered_category_expressions/generator.rb +6 -2
- data/lib/tiered_category_expressions/parser.rb +9 -2
- data/lib/tiered_category_expressions/tail.rb +19 -0
- data/lib/tiered_category_expressions/tiers.rb +16 -3
- data/lib/tiered_category_expressions/transformer.rb +12 -12
- data/lib/tiered_category_expressions/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f0f9c42ee667e337270f971d95cc4dccb1592225
|
4
|
+
data.tar.gz: 665b8e13ae7e67176d111f0879a8705a5e598fb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c31e3b5c6595d2be8abc3df8a13b29a7d5c0079aef2e7ed74d8a3b26e3475ca5a62106e9a4c1cf90a43052197d4ed1a1fb90abe8dd8cbc5d4806c5795b4e6afc
|
7
|
+
data.tar.gz: 0b7b3f3ffc5e589341f2a92568b565ee6ba5de499bec30a42d861c3366ff76c8b8ea1d671dd68e3c72d0d29d92926bd78d73a6e63a35928278994de933104576
|
data/LANGREF.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# TCE language reference
|
1
|
+
# TCE v1.1 language reference
|
2
2
|
|
3
3
|
## Introduction
|
4
4
|
|
@@ -14,6 +14,7 @@ TCEs follow the same structure. They consist of category tier names separated by
|
|
14
14
|
|
15
15
|
- `agf > groente`
|
16
16
|
- `agf > groente > komkommer`
|
17
|
+
- `agf > groente > komkommer.`
|
17
18
|
- `agf > groente > kom%`
|
18
19
|
- `agf > groente | fruit > komkommer`
|
19
20
|
- `agf > groente > !tomaat`
|
@@ -105,6 +106,41 @@ Note that TCEs are considered equal if they match the same categories. E.g. thes
|
|
105
106
|
["Rucola"]
|
106
107
|
```
|
107
108
|
|
109
|
+
### Explicit last tier(s) `.`
|
110
|
+
```ruby
|
111
|
+
"agf > groente > komkommer."
|
112
|
+
|
113
|
+
# Matches
|
114
|
+
["AGF", "Groente", "Komkommer"]
|
115
|
+
|
116
|
+
# Does not match
|
117
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
118
|
+
```
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
"agf > groente. > komkommer"
|
122
|
+
|
123
|
+
# Matches
|
124
|
+
["AGF", "Groente"]
|
125
|
+
["AGF", "Groente", "Komkommer"]
|
126
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
127
|
+
|
128
|
+
# Does not match
|
129
|
+
["AGF", "Groente", "Tomaat"]
|
130
|
+
```
|
131
|
+
|
132
|
+
```ruby
|
133
|
+
"agf > groente. > komkommer."
|
134
|
+
|
135
|
+
# Matches
|
136
|
+
["AGF", "Groente"]
|
137
|
+
["AGF", "Groente", "Komkommer"]
|
138
|
+
|
139
|
+
# Does not match
|
140
|
+
["AGF", "Groente", "Tomaat"]
|
141
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
142
|
+
```
|
143
|
+
|
108
144
|
### Combining patterns
|
109
145
|
```ruby
|
110
146
|
"groente > seizoensgroente > %"
|
@@ -142,3 +178,21 @@ Note that TCEs are considered equal if they match the same categories. E.g. thes
|
|
142
178
|
["Nonfood", "Diervoeding"]
|
143
179
|
["Nonfood"]
|
144
180
|
```
|
181
|
+
|
182
|
+
```ruby
|
183
|
+
"voeding. >> %voeding."
|
184
|
+
|
185
|
+
# Matches
|
186
|
+
["Voeding"]
|
187
|
+
["Voeding", "Babyvoeding"]
|
188
|
+
["Voeding", "Diervoeding"]
|
189
|
+
["Voeding", "Baby", "Babyvoeding"]
|
190
|
+
["Voeding", "Dier", "Diervoeding"]
|
191
|
+
|
192
|
+
# Does not match
|
193
|
+
["Voeding", "AGF"]
|
194
|
+
["Voeding", "Babyvoeding", "Newborn"]
|
195
|
+
["Voeding", "Diervoeding", "Hond"]
|
196
|
+
["Voeding", "Baby", "Babyvoeding", "Newborn"]
|
197
|
+
["Voeding", "Dier", "Diervoeding", "Hond"]
|
198
|
+
```
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
<img src="https://travis-ci.com/q-m/tiered_category_expressions.svg?branch=master" align="right" />
|
2
|
+
|
1
3
|
# Tiered Category Expressions
|
2
4
|
|
3
|
-
Work with
|
5
|
+
Work with TCE v1.1 in Ruby.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -39,7 +41,7 @@ require 'tiered_category_expressions/core'
|
|
39
41
|
|
40
42
|
```ruby
|
41
43
|
tce = TCE("groceries > nonfood | pharmacy >> !baby formula")
|
42
|
-
# => TieredCategoryExpressions::Expression
|
44
|
+
# => TieredCategoryExpressions::Expression
|
43
45
|
|
44
46
|
tce.matches?(["Groceries", "Non-food", "Cleaning", "Soap"])
|
45
47
|
# => true
|
@@ -51,13 +53,13 @@ tce.matches?(["Groceries", "Pharmacy", "Baby", "Pacifiers"])
|
|
51
53
|
# => true
|
52
54
|
|
53
55
|
tce.to_regexp
|
54
|
-
# =>
|
56
|
+
# => Regexp
|
55
57
|
|
56
58
|
tce.as_regexp
|
57
|
-
# =>
|
59
|
+
# => String
|
58
60
|
|
59
61
|
TCE("groceries > nonfood") > TCE("baby") > ">> pacifiers"
|
60
|
-
# => TieredCategoryExpressions::Expression
|
62
|
+
# => TieredCategoryExpressions::Expression
|
61
63
|
```
|
62
64
|
|
63
65
|
## Development
|
@@ -36,9 +36,12 @@ module TieredCategoryExpressions
|
|
36
36
|
raise ParseError, "unexpected input at character #{column}"
|
37
37
|
end
|
38
38
|
|
39
|
+
# @param strict [Boolean] If +true+ is given then the object should not match categories with tiers that extend
|
40
|
+
# beyond those specified by the TCE. This is the case when the TCE ends with ".".
|
39
41
|
# @!visibility private
|
40
|
-
def initialize(tiers)
|
42
|
+
def initialize(tiers, strict:)
|
41
43
|
@tiers = tiers
|
44
|
+
@strict = !!strict
|
42
45
|
end
|
43
46
|
|
44
47
|
# @!visibility private
|
@@ -46,14 +49,18 @@ module TieredCategoryExpressions
|
|
46
49
|
"TieredCategoryExpressions::Expression[#{self}]"
|
47
50
|
end
|
48
51
|
|
52
|
+
# @param implied_root [Boolean] If +true+ no leading ">" is included.
|
49
53
|
# @return [String] String representation of the expression
|
50
|
-
def to_s
|
51
|
-
@tiers.join(" ")
|
54
|
+
def to_s(implied_root: true)
|
55
|
+
str = @tiers.join(" ")
|
56
|
+
str << "." if @strict
|
57
|
+
str = str.sub(/^>(?!>)\s*/, "") if implied_root # Initial ">" is implied (but ">>" is not)
|
58
|
+
str
|
52
59
|
end
|
53
60
|
|
54
61
|
# @return [Regexp] Regexp representation of the expression as a string (does not include flags)
|
55
62
|
def as_regexp
|
56
|
-
"^#{@tiers.map(&:as_regexp).join}"
|
63
|
+
"^#{@tiers.map(&:as_regexp).join}#{'$' if @strict}"
|
57
64
|
end
|
58
65
|
|
59
66
|
# @return [String] Regexp representation of the expression
|
@@ -65,6 +72,7 @@ module TieredCategoryExpressions
|
|
65
72
|
#
|
66
73
|
# @param category [Array<String>] Category to match
|
67
74
|
# @return [Boolean]
|
75
|
+
#
|
68
76
|
def matches?(category)
|
69
77
|
to_regexp.match?(Preprocessor.call(category))
|
70
78
|
end
|
@@ -89,14 +97,21 @@ module TieredCategoryExpressions
|
|
89
97
|
# @return [Expression]
|
90
98
|
#
|
91
99
|
def >(other)
|
92
|
-
|
100
|
+
TieredCategoryExpressions::TCE(to_s + TieredCategoryExpressions::TCE(other).to_s(implied_root: false))
|
101
|
+
end
|
102
|
+
|
103
|
+
# @return [Boolean] +true+ if the TCE object does not match categories with tiers that extend beyond those
|
104
|
+
# specified by the TCE. This is the case when the TCE ends with ".".
|
105
|
+
#
|
106
|
+
def strict?
|
107
|
+
@strict
|
93
108
|
end
|
94
109
|
|
95
110
|
# Returns an SQL LIKE query that may be used to speed up certain SQL queries.
|
96
111
|
#
|
97
112
|
# SQL queries that involve matching some input against stored TCE regexps can be slow. Possibly, they can be
|
98
113
|
# optimized by applying a much faster LIKE query first, which reduces the number of regexps to apply. The LIKE
|
99
|
-
# query alone still
|
114
|
+
# query alone can still yield false positives, so it must be combined with the corresponding regexp.
|
100
115
|
#
|
101
116
|
# For instance:
|
102
117
|
#
|
@@ -109,7 +124,9 @@ module TieredCategoryExpressions
|
|
109
124
|
# Depending on the TCEs in the _mappings_ table.
|
110
125
|
#
|
111
126
|
def as_sql_like_query
|
112
|
-
@tiers.map(&:as_sql_like_query).join
|
127
|
+
q = @tiers.map(&:as_sql_like_query).join
|
128
|
+
q += "%" unless @strict || q.end_with?("%")
|
129
|
+
q
|
113
130
|
end
|
114
131
|
|
115
132
|
protected
|
@@ -11,14 +11,18 @@ module TieredCategoryExpressions
|
|
11
11
|
# # => TieredCategoryExpressions::Expression[Nonfood > Baby > Baby formula]
|
12
12
|
#
|
13
13
|
# @param category [Array<String>]
|
14
|
+
# @param strict [Boolean] If +true+ is given then the resulting TCE will not match subcategories of the given
|
15
|
+
# category.
|
14
16
|
# @return [Expression, nil]
|
15
17
|
#
|
16
|
-
def call(category)
|
18
|
+
def call(category, strict: false)
|
17
19
|
return if category.empty?
|
18
20
|
|
19
21
|
tiers = category.map { |t| sanitize_name(t) or return nil }
|
22
|
+
expression = tiers.join(">")
|
23
|
+
expression << "." if strict
|
20
24
|
|
21
|
-
TieredCategoryExpressions::TCE(
|
25
|
+
TieredCategoryExpressions::TCE(expression)
|
22
26
|
end
|
23
27
|
|
24
28
|
private
|
@@ -17,9 +17,16 @@ module TieredCategoryExpressions
|
|
17
17
|
|
18
18
|
rule(:word) { (match["[:alnum:]"] | str("%")).repeat(1) >> space? }
|
19
19
|
rule(:name) { word.repeat(1).as(:name) }
|
20
|
-
rule(:namelist) { (name >> (namesep >> name).repeat).as(:namelist) }
|
20
|
+
rule(:namelist) { (name.repeat(1, 1) >> (namesep >> name).repeat).as(:namelist) }
|
21
|
+
|
22
|
+
rule(:stop) { str(".") >> space? }
|
23
|
+
|
24
|
+
rule(:tier1) { (connector | negator).maybe.as(:operator) >> namelist }
|
25
|
+
rule(:tier) { connector.as(:operator) >> namelist }
|
26
|
+
rule(:tiers) { tier.repeat >> (stop >> (tier.repeat(1, 1) >> tiers).as(:tail)).maybe }
|
27
|
+
|
28
|
+
rule(:tce) { space? >> (tier1.repeat(1, 1) >> tiers).as(:tiers) >> stop.maybe.as(:eoct) }
|
21
29
|
|
22
|
-
rule(:tce) { space? >> (((connector | negator).as(:operator).maybe >> namelist).as(:tier) >> (connector.as(:operator) >> namelist).as(:tier).repeat).as(:expression) }
|
23
30
|
root(:tce)
|
24
31
|
end
|
25
32
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module TieredCategoryExpressions
|
2
|
+
class Tail
|
3
|
+
def initialize(tiers)
|
4
|
+
@tiers = tiers
|
5
|
+
end
|
6
|
+
|
7
|
+
def to_s
|
8
|
+
". " + @tiers.join(" ")
|
9
|
+
end
|
10
|
+
|
11
|
+
def as_regexp
|
12
|
+
"($|(#{@tiers.map(&:as_regexp).join}))"
|
13
|
+
end
|
14
|
+
|
15
|
+
def as_sql_like_query
|
16
|
+
"%"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,5 +1,18 @@
|
|
1
1
|
module TieredCategoryExpressions
|
2
2
|
class Tier < Struct.new(:operator, :namelist)
|
3
|
+
def self.build(operator, names)
|
4
|
+
klass = case operator&.to_s&.tr(" ", "")
|
5
|
+
when ">", nil then Child
|
6
|
+
when ">!", "!" then IChild
|
7
|
+
when ">>" then Descendant
|
8
|
+
when ">>!" then IDescendant
|
9
|
+
else raise "no such operator #{operator.inspect}"
|
10
|
+
end
|
11
|
+
|
12
|
+
namelist = Namelist.new(names)
|
13
|
+
klass.new(namelist)
|
14
|
+
end
|
15
|
+
|
3
16
|
def to_s
|
4
17
|
"#{operator} #{namelist}"
|
5
18
|
end
|
@@ -32,7 +45,7 @@ module TieredCategoryExpressions
|
|
32
45
|
end
|
33
46
|
|
34
47
|
def as_regexp
|
35
|
-
"(?!#{namelist.as_regexp}>)
|
48
|
+
"(?!#{namelist.as_regexp}>)[a-z0-9]+>"
|
36
49
|
end
|
37
50
|
|
38
51
|
def as_sql_like_query
|
@@ -46,7 +59,7 @@ module TieredCategoryExpressions
|
|
46
59
|
end
|
47
60
|
|
48
61
|
def as_regexp
|
49
|
-
"(
|
62
|
+
"([a-z0-9]+>)*#{namelist.as_regexp}>"
|
50
63
|
end
|
51
64
|
|
52
65
|
def as_sql_like_query
|
@@ -60,7 +73,7 @@ module TieredCategoryExpressions
|
|
60
73
|
end
|
61
74
|
|
62
75
|
def as_regexp
|
63
|
-
"(?!(
|
76
|
+
"(?!([a-z0-9]+>)*#{namelist.as_regexp}>)([a-z0-9]+>)+"
|
64
77
|
end
|
65
78
|
|
66
79
|
def as_sql_like_query
|
@@ -2,25 +2,25 @@ require "parslet"
|
|
2
2
|
require "tiered_category_expressions/name"
|
3
3
|
require "tiered_category_expressions/namelist"
|
4
4
|
require "tiered_category_expressions/tiers"
|
5
|
+
require "tiered_category_expressions/tail"
|
5
6
|
require "tiered_category_expressions/expression"
|
6
7
|
|
7
8
|
module TieredCategoryExpressions
|
8
9
|
class Transformer < Parslet::Transform
|
9
|
-
rule(:name => simple(:name))
|
10
|
+
rule(:name => simple(:name)) do
|
11
|
+
Name.new(name.to_s)
|
12
|
+
end
|
10
13
|
|
11
|
-
rule(:
|
12
|
-
|
13
|
-
|
14
|
-
when ">!", "!" then Tier::IChild
|
15
|
-
when ">>" then Tier::Descendant
|
16
|
-
when ">>!" then Tier::IDescendant
|
17
|
-
else raise "no such operator #{tier[:operator].inspect}"
|
18
|
-
end
|
14
|
+
rule(:operator => simple(:op), :namelist => sequence(:names)) do
|
15
|
+
Tier.build(op, names)
|
16
|
+
end
|
19
17
|
|
20
|
-
|
21
|
-
|
18
|
+
rule(:tail => sequence(:tiers)) do
|
19
|
+
Tail.new(tiers)
|
22
20
|
end
|
23
21
|
|
24
|
-
rule(:
|
22
|
+
rule(:tiers => sequence(:tiers), :eoct => simple(:eoct)) do
|
23
|
+
Expression.new(tiers, strict: !!eoct)
|
24
|
+
end
|
25
25
|
end
|
26
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiered_category_expressions
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sjoerd Andringa
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-05-
|
11
|
+
date: 2019-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,7 @@ files:
|
|
122
122
|
- lib/tiered_category_expressions/namelist.rb
|
123
123
|
- lib/tiered_category_expressions/parser.rb
|
124
124
|
- lib/tiered_category_expressions/preprocessor.rb
|
125
|
+
- lib/tiered_category_expressions/tail.rb
|
125
126
|
- lib/tiered_category_expressions/tiers.rb
|
126
127
|
- lib/tiered_category_expressions/transformer.rb
|
127
128
|
- lib/tiered_category_expressions/util.rb
|