tiered_category_expressions 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LANGREF.md +55 -1
- data/README.md +7 -5
- data/lib/tiered_category_expressions/expression.rb +24 -7
- data/lib/tiered_category_expressions/generator.rb +6 -2
- data/lib/tiered_category_expressions/parser.rb +9 -2
- data/lib/tiered_category_expressions/tail.rb +19 -0
- data/lib/tiered_category_expressions/tiers.rb +16 -3
- data/lib/tiered_category_expressions/transformer.rb +12 -12
- data/lib/tiered_category_expressions/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f0f9c42ee667e337270f971d95cc4dccb1592225
|
4
|
+
data.tar.gz: 665b8e13ae7e67176d111f0879a8705a5e598fb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c31e3b5c6595d2be8abc3df8a13b29a7d5c0079aef2e7ed74d8a3b26e3475ca5a62106e9a4c1cf90a43052197d4ed1a1fb90abe8dd8cbc5d4806c5795b4e6afc
|
7
|
+
data.tar.gz: 0b7b3f3ffc5e589341f2a92568b565ee6ba5de499bec30a42d861c3366ff76c8b8ea1d671dd68e3c72d0d29d92926bd78d73a6e63a35928278994de933104576
|
data/LANGREF.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# TCE language reference
|
1
|
+
# TCE v1.1 language reference
|
2
2
|
|
3
3
|
## Introduction
|
4
4
|
|
@@ -14,6 +14,7 @@ TCEs follow the same structure. They consist of category tier names separated by
|
|
14
14
|
|
15
15
|
- `agf > groente`
|
16
16
|
- `agf > groente > komkommer`
|
17
|
+
- `agf > groente > komkommer.`
|
17
18
|
- `agf > groente > kom%`
|
18
19
|
- `agf > groente | fruit > komkommer`
|
19
20
|
- `agf > groente > !tomaat`
|
@@ -105,6 +106,41 @@ Note that TCEs are considered equal if they match the same categories. E.g. thes
|
|
105
106
|
["Rucola"]
|
106
107
|
```
|
107
108
|
|
109
|
+
### Explicit last tier(s) `.`
|
110
|
+
```ruby
|
111
|
+
"agf > groente > komkommer."
|
112
|
+
|
113
|
+
# Matches
|
114
|
+
["AGF", "Groente", "Komkommer"]
|
115
|
+
|
116
|
+
# Does not match
|
117
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
118
|
+
```
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
"agf > groente. > komkommer"
|
122
|
+
|
123
|
+
# Matches
|
124
|
+
["AGF", "Groente"]
|
125
|
+
["AGF", "Groente", "Komkommer"]
|
126
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
127
|
+
|
128
|
+
# Does not match
|
129
|
+
["AGF", "Groente", "Tomaat"]
|
130
|
+
```
|
131
|
+
|
132
|
+
```ruby
|
133
|
+
"agf > groente. > komkommer."
|
134
|
+
|
135
|
+
# Matches
|
136
|
+
["AGF", "Groente"]
|
137
|
+
["AGF", "Groente", "Komkommer"]
|
138
|
+
|
139
|
+
# Does not match
|
140
|
+
["AGF", "Groente", "Tomaat"]
|
141
|
+
["AGF", "Groente", "Komkommer", "Snack komkommer"]
|
142
|
+
```
|
143
|
+
|
108
144
|
### Combining patterns
|
109
145
|
```ruby
|
110
146
|
"groente > seizoensgroente > %"
|
@@ -142,3 +178,21 @@ Note that TCEs are considered equal if they match the same categories. E.g. thes
|
|
142
178
|
["Nonfood", "Diervoeding"]
|
143
179
|
["Nonfood"]
|
144
180
|
```
|
181
|
+
|
182
|
+
```ruby
|
183
|
+
"voeding. >> %voeding."
|
184
|
+
|
185
|
+
# Matches
|
186
|
+
["Voeding"]
|
187
|
+
["Voeding", "Babyvoeding"]
|
188
|
+
["Voeding", "Diervoeding"]
|
189
|
+
["Voeding", "Baby", "Babyvoeding"]
|
190
|
+
["Voeding", "Dier", "Diervoeding"]
|
191
|
+
|
192
|
+
# Does not match
|
193
|
+
["Voeding", "AGF"]
|
194
|
+
["Voeding", "Babyvoeding", "Newborn"]
|
195
|
+
["Voeding", "Diervoeding", "Hond"]
|
196
|
+
["Voeding", "Baby", "Babyvoeding", "Newborn"]
|
197
|
+
["Voeding", "Dier", "Diervoeding", "Hond"]
|
198
|
+
```
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
<img src="https://travis-ci.com/q-m/tiered_category_expressions.svg?branch=master" align="right" />
|
2
|
+
|
1
3
|
# Tiered Category Expressions
|
2
4
|
|
3
|
-
Work with
|
5
|
+
Work with TCE v1.1 in Ruby.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -39,7 +41,7 @@ require 'tiered_category_expressions/core'
|
|
39
41
|
|
40
42
|
```ruby
|
41
43
|
tce = TCE("groceries > nonfood | pharmacy >> !baby formula")
|
42
|
-
# => TieredCategoryExpressions::Expression
|
44
|
+
# => TieredCategoryExpressions::Expression
|
43
45
|
|
44
46
|
tce.matches?(["Groceries", "Non-food", "Cleaning", "Soap"])
|
45
47
|
# => true
|
@@ -51,13 +53,13 @@ tce.matches?(["Groceries", "Pharmacy", "Baby", "Pacifiers"])
|
|
51
53
|
# => true
|
52
54
|
|
53
55
|
tce.to_regexp
|
54
|
-
# =>
|
56
|
+
# => Regexp
|
55
57
|
|
56
58
|
tce.as_regexp
|
57
|
-
# =>
|
59
|
+
# => String
|
58
60
|
|
59
61
|
TCE("groceries > nonfood") > TCE("baby") > ">> pacifiers"
|
60
|
-
# => TieredCategoryExpressions::Expression
|
62
|
+
# => TieredCategoryExpressions::Expression
|
61
63
|
```
|
62
64
|
|
63
65
|
## Development
|
@@ -36,9 +36,12 @@ module TieredCategoryExpressions
|
|
36
36
|
raise ParseError, "unexpected input at character #{column}"
|
37
37
|
end
|
38
38
|
|
39
|
+
# @param strict [Boolean] If +true+ is given then the object should not match categories with tiers that extend
|
40
|
+
# beyond those specified by the TCE. This is the case when the TCE ends with ".".
|
39
41
|
# @!visibility private
|
40
|
-
def initialize(tiers)
|
42
|
+
def initialize(tiers, strict:)
|
41
43
|
@tiers = tiers
|
44
|
+
@strict = !!strict
|
42
45
|
end
|
43
46
|
|
44
47
|
# @!visibility private
|
@@ -46,14 +49,18 @@ module TieredCategoryExpressions
|
|
46
49
|
"TieredCategoryExpressions::Expression[#{self}]"
|
47
50
|
end
|
48
51
|
|
52
|
+
# @param implied_root [Boolean] If +true+ no leading ">" is included.
|
49
53
|
# @return [String] String representation of the expression
|
50
|
-
def to_s
|
51
|
-
@tiers.join(" ")
|
54
|
+
def to_s(implied_root: true)
|
55
|
+
str = @tiers.join(" ")
|
56
|
+
str << "." if @strict
|
57
|
+
str = str.sub(/^>(?!>)\s*/, "") if implied_root # Initial ">" is implied (but ">>" is not)
|
58
|
+
str
|
52
59
|
end
|
53
60
|
|
54
61
|
# @return [Regexp] Regexp representation of the expression as a string (does not include flags)
|
55
62
|
def as_regexp
|
56
|
-
"^#{@tiers.map(&:as_regexp).join}"
|
63
|
+
"^#{@tiers.map(&:as_regexp).join}#{'$' if @strict}"
|
57
64
|
end
|
58
65
|
|
59
66
|
# @return [String] Regexp representation of the expression
|
@@ -65,6 +72,7 @@ module TieredCategoryExpressions
|
|
65
72
|
#
|
66
73
|
# @param category [Array<String>] Category to match
|
67
74
|
# @return [Boolean]
|
75
|
+
#
|
68
76
|
def matches?(category)
|
69
77
|
to_regexp.match?(Preprocessor.call(category))
|
70
78
|
end
|
@@ -89,14 +97,21 @@ module TieredCategoryExpressions
|
|
89
97
|
# @return [Expression]
|
90
98
|
#
|
91
99
|
def >(other)
|
92
|
-
|
100
|
+
TieredCategoryExpressions::TCE(to_s + TieredCategoryExpressions::TCE(other).to_s(implied_root: false))
|
101
|
+
end
|
102
|
+
|
103
|
+
# @return [Boolean] +true+ if the TCE object does not match categories with tiers that extend beyond those
|
104
|
+
# specified by the TCE. This is the case when the TCE ends with ".".
|
105
|
+
#
|
106
|
+
def strict?
|
107
|
+
@strict
|
93
108
|
end
|
94
109
|
|
95
110
|
# Returns an SQL LIKE query that may be used to speed up certain SQL queries.
|
96
111
|
#
|
97
112
|
# SQL queries that involve matching some input against stored TCE regexps can be slow. Possibly, they can be
|
98
113
|
# optimized by applying a much faster LIKE query first, which reduces the number of regexps to apply. The LIKE
|
99
|
-
# query alone still
|
114
|
+
# query alone can still yield false positives, so it must be combined with the corresponding regexp.
|
100
115
|
#
|
101
116
|
# For instance:
|
102
117
|
#
|
@@ -109,7 +124,9 @@ module TieredCategoryExpressions
|
|
109
124
|
# Depending on the TCEs in the _mappings_ table.
|
110
125
|
#
|
111
126
|
def as_sql_like_query
|
112
|
-
@tiers.map(&:as_sql_like_query).join
|
127
|
+
q = @tiers.map(&:as_sql_like_query).join
|
128
|
+
q += "%" unless @strict || q.end_with?("%")
|
129
|
+
q
|
113
130
|
end
|
114
131
|
|
115
132
|
protected
|
@@ -11,14 +11,18 @@ module TieredCategoryExpressions
|
|
11
11
|
# # => TieredCategoryExpressions::Expression[Nonfood > Baby > Baby formula]
|
12
12
|
#
|
13
13
|
# @param category [Array<String>]
|
14
|
+
# @param strict [Boolean] If +true+ is given then the resulting TCE will not match subcategories of the given
|
15
|
+
# category.
|
14
16
|
# @return [Expression, nil]
|
15
17
|
#
|
16
|
-
def call(category)
|
18
|
+
def call(category, strict: false)
|
17
19
|
return if category.empty?
|
18
20
|
|
19
21
|
tiers = category.map { |t| sanitize_name(t) or return nil }
|
22
|
+
expression = tiers.join(">")
|
23
|
+
expression << "." if strict
|
20
24
|
|
21
|
-
TieredCategoryExpressions::TCE(
|
25
|
+
TieredCategoryExpressions::TCE(expression)
|
22
26
|
end
|
23
27
|
|
24
28
|
private
|
@@ -17,9 +17,16 @@ module TieredCategoryExpressions
|
|
17
17
|
|
18
18
|
rule(:word) { (match["[:alnum:]"] | str("%")).repeat(1) >> space? }
|
19
19
|
rule(:name) { word.repeat(1).as(:name) }
|
20
|
-
rule(:namelist) { (name >> (namesep >> name).repeat).as(:namelist) }
|
20
|
+
rule(:namelist) { (name.repeat(1, 1) >> (namesep >> name).repeat).as(:namelist) }
|
21
|
+
|
22
|
+
rule(:stop) { str(".") >> space? }
|
23
|
+
|
24
|
+
rule(:tier1) { (connector | negator).maybe.as(:operator) >> namelist }
|
25
|
+
rule(:tier) { connector.as(:operator) >> namelist }
|
26
|
+
rule(:tiers) { tier.repeat >> (stop >> (tier.repeat(1, 1) >> tiers).as(:tail)).maybe }
|
27
|
+
|
28
|
+
rule(:tce) { space? >> (tier1.repeat(1, 1) >> tiers).as(:tiers) >> stop.maybe.as(:eoct) }
|
21
29
|
|
22
|
-
rule(:tce) { space? >> (((connector | negator).as(:operator).maybe >> namelist).as(:tier) >> (connector.as(:operator) >> namelist).as(:tier).repeat).as(:expression) }
|
23
30
|
root(:tce)
|
24
31
|
end
|
25
32
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module TieredCategoryExpressions
|
2
|
+
class Tail
|
3
|
+
def initialize(tiers)
|
4
|
+
@tiers = tiers
|
5
|
+
end
|
6
|
+
|
7
|
+
def to_s
|
8
|
+
". " + @tiers.join(" ")
|
9
|
+
end
|
10
|
+
|
11
|
+
def as_regexp
|
12
|
+
"($|(#{@tiers.map(&:as_regexp).join}))"
|
13
|
+
end
|
14
|
+
|
15
|
+
def as_sql_like_query
|
16
|
+
"%"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,5 +1,18 @@
|
|
1
1
|
module TieredCategoryExpressions
|
2
2
|
class Tier < Struct.new(:operator, :namelist)
|
3
|
+
def self.build(operator, names)
|
4
|
+
klass = case operator&.to_s&.tr(" ", "")
|
5
|
+
when ">", nil then Child
|
6
|
+
when ">!", "!" then IChild
|
7
|
+
when ">>" then Descendant
|
8
|
+
when ">>!" then IDescendant
|
9
|
+
else raise "no such operator #{operator.inspect}"
|
10
|
+
end
|
11
|
+
|
12
|
+
namelist = Namelist.new(names)
|
13
|
+
klass.new(namelist)
|
14
|
+
end
|
15
|
+
|
3
16
|
def to_s
|
4
17
|
"#{operator} #{namelist}"
|
5
18
|
end
|
@@ -32,7 +45,7 @@ module TieredCategoryExpressions
|
|
32
45
|
end
|
33
46
|
|
34
47
|
def as_regexp
|
35
|
-
"(?!#{namelist.as_regexp}>)
|
48
|
+
"(?!#{namelist.as_regexp}>)[a-z0-9]+>"
|
36
49
|
end
|
37
50
|
|
38
51
|
def as_sql_like_query
|
@@ -46,7 +59,7 @@ module TieredCategoryExpressions
|
|
46
59
|
end
|
47
60
|
|
48
61
|
def as_regexp
|
49
|
-
"(
|
62
|
+
"([a-z0-9]+>)*#{namelist.as_regexp}>"
|
50
63
|
end
|
51
64
|
|
52
65
|
def as_sql_like_query
|
@@ -60,7 +73,7 @@ module TieredCategoryExpressions
|
|
60
73
|
end
|
61
74
|
|
62
75
|
def as_regexp
|
63
|
-
"(?!(
|
76
|
+
"(?!([a-z0-9]+>)*#{namelist.as_regexp}>)([a-z0-9]+>)+"
|
64
77
|
end
|
65
78
|
|
66
79
|
def as_sql_like_query
|
@@ -2,25 +2,25 @@ require "parslet"
|
|
2
2
|
require "tiered_category_expressions/name"
|
3
3
|
require "tiered_category_expressions/namelist"
|
4
4
|
require "tiered_category_expressions/tiers"
|
5
|
+
require "tiered_category_expressions/tail"
|
5
6
|
require "tiered_category_expressions/expression"
|
6
7
|
|
7
8
|
module TieredCategoryExpressions
|
8
9
|
class Transformer < Parslet::Transform
|
9
|
-
rule(:name => simple(:name))
|
10
|
+
rule(:name => simple(:name)) do
|
11
|
+
Name.new(name.to_s)
|
12
|
+
end
|
10
13
|
|
11
|
-
rule(:
|
12
|
-
|
13
|
-
|
14
|
-
when ">!", "!" then Tier::IChild
|
15
|
-
when ">>" then Tier::Descendant
|
16
|
-
when ">>!" then Tier::IDescendant
|
17
|
-
else raise "no such operator #{tier[:operator].inspect}"
|
18
|
-
end
|
14
|
+
rule(:operator => simple(:op), :namelist => sequence(:names)) do
|
15
|
+
Tier.build(op, names)
|
16
|
+
end
|
19
17
|
|
20
|
-
|
21
|
-
|
18
|
+
rule(:tail => sequence(:tiers)) do
|
19
|
+
Tail.new(tiers)
|
22
20
|
end
|
23
21
|
|
24
|
-
rule(:
|
22
|
+
rule(:tiers => sequence(:tiers), :eoct => simple(:eoct)) do
|
23
|
+
Expression.new(tiers, strict: !!eoct)
|
24
|
+
end
|
25
25
|
end
|
26
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiered_category_expressions
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sjoerd Andringa
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-05-
|
11
|
+
date: 2019-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,7 @@ files:
|
|
122
122
|
- lib/tiered_category_expressions/namelist.rb
|
123
123
|
- lib/tiered_category_expressions/parser.rb
|
124
124
|
- lib/tiered_category_expressions/preprocessor.rb
|
125
|
+
- lib/tiered_category_expressions/tail.rb
|
125
126
|
- lib/tiered_category_expressions/tiers.rb
|
126
127
|
- lib/tiered_category_expressions/transformer.rb
|
127
128
|
- lib/tiered_category_expressions/util.rb
|