tiered_category_expressions 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5501de321cdb8513c0913ceb5163822615701500
4
- data.tar.gz: 24748da10d31e3f935e315582cbc98cc6c6c069f
3
+ metadata.gz: f0f9c42ee667e337270f971d95cc4dccb1592225
4
+ data.tar.gz: 665b8e13ae7e67176d111f0879a8705a5e598fb2
5
5
  SHA512:
6
- metadata.gz: 6829ffcf02a4b9bc5fb9d0dd1ef0c80161fed35220eaee6689c867b817261bfcd2d2fcb8f4329a02fc4df887d706e42ccc58cadfb0dd55dda75c60450ce9cc5d
7
- data.tar.gz: 49fc8d928beabb5d4a165aa5d9f4a0f7ff11156e5e11006667ed83cec416dbd5d0386f6c5785beb7e3cbbc5f9c5b49740da2349a25345f435fa0eed309d90625
6
+ metadata.gz: c31e3b5c6595d2be8abc3df8a13b29a7d5c0079aef2e7ed74d8a3b26e3475ca5a62106e9a4c1cf90a43052197d4ed1a1fb90abe8dd8cbc5d4806c5795b4e6afc
7
+ data.tar.gz: 0b7b3f3ffc5e589341f2a92568b565ee6ba5de499bec30a42d861c3366ff76c8b8ea1d671dd68e3c72d0d29d92926bd78d73a6e63a35928278994de933104576
data/LANGREF.md CHANGED
@@ -1,4 +1,4 @@
1
- # TCE language reference
1
+ # TCE v1.1 language reference
2
2
 
3
3
  ## Introduction
4
4
 
@@ -14,6 +14,7 @@ TCEs follow the same structure. They consist of category tier names separated by
14
14
 
15
15
  - `agf > groente`
16
16
  - `agf > groente > komkommer`
17
+ - `agf > groente > komkommer.`
17
18
  - `agf > groente > kom%`
18
19
  - `agf > groente | fruit > komkommer`
19
20
  - `agf > groente > !tomaat`
@@ -105,6 +106,41 @@ Note that TCEs are considered equal if they match the same categories. E.g. thes
105
106
  ["Rucola"]
106
107
  ```
107
108
 
109
+ ### Explicit last tier(s) `.`
110
+ ```ruby
111
+ "agf > groente > komkommer."
112
+
113
+ # Matches
114
+ ["AGF", "Groente", "Komkommer"]
115
+
116
+ # Does not match
117
+ ["AGF", "Groente", "Komkommer", "Snack komkommer"]
118
+ ```
119
+
120
+ ```ruby
121
+ "agf > groente. > komkommer"
122
+
123
+ # Matches
124
+ ["AGF", "Groente"]
125
+ ["AGF", "Groente", "Komkommer"]
126
+ ["AGF", "Groente", "Komkommer", "Snack komkommer"]
127
+
128
+ # Does not match
129
+ ["AGF", "Groente", "Tomaat"]
130
+ ```
131
+
132
+ ```ruby
133
+ "agf > groente. > komkommer."
134
+
135
+ # Matches
136
+ ["AGF", "Groente"]
137
+ ["AGF", "Groente", "Komkommer"]
138
+
139
+ # Does not match
140
+ ["AGF", "Groente", "Tomaat"]
141
+ ["AGF", "Groente", "Komkommer", "Snack komkommer"]
142
+ ```
143
+
108
144
  ### Combining patterns
109
145
  ```ruby
110
146
  "groente > seizoensgroente > %"
@@ -142,3 +178,21 @@ Note that TCEs are considered equal if they match the same categories. E.g. thes
142
178
  ["Nonfood", "Diervoeding"]
143
179
  ["Nonfood"]
144
180
  ```
181
+
182
+ ```ruby
183
+ "voeding. >> %voeding."
184
+
185
+ # Matches
186
+ ["Voeding"]
187
+ ["Voeding", "Babyvoeding"]
188
+ ["Voeding", "Diervoeding"]
189
+ ["Voeding", "Baby", "Babyvoeding"]
190
+ ["Voeding", "Dier", "Diervoeding"]
191
+
192
+ # Does not match
193
+ ["Voeding", "AGF"]
194
+ ["Voeding", "Babyvoeding", "Newborn"]
195
+ ["Voeding", "Diervoeding", "Hond"]
196
+ ["Voeding", "Baby", "Babyvoeding", "Newborn"]
197
+ ["Voeding", "Dier", "Diervoeding", "Hond"]
198
+ ```
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
+ <img src="https://travis-ci.com/q-m/tiered_category_expressions.svg?branch=master" align="right" />
2
+
1
3
  # Tiered Category Expressions
2
4
 
3
- Work with TCEs in Ruby.
5
+ Work with TCE v1.1 in Ruby.
4
6
 
5
7
  ## Installation
6
8
 
@@ -39,7 +41,7 @@ require 'tiered_category_expressions/core'
39
41
 
40
42
  ```ruby
41
43
  tce = TCE("groceries > nonfood | pharmacy >> !baby formula")
42
- # => TieredCategoryExpressions::Expression[groceries > nonfood | pharmacy >> !baby formula]
44
+ # => TieredCategoryExpressions::Expression
43
45
 
44
46
  tce.matches?(["Groceries", "Non-food", "Cleaning", "Soap"])
45
47
  # => true
@@ -51,13 +53,13 @@ tce.matches?(["Groceries", "Pharmacy", "Baby", "Pacifiers"])
51
53
  # => true
52
54
 
53
55
  tce.to_regexp
54
- # => /^(groceries)>(nonfood|pharmacy)>(?!(.+>)*(babyformula)>).+>/i
56
+ # => Regexp
55
57
 
56
58
  tce.as_regexp
57
- # => "^(groceries)>(nonfood|pharmacy)>(?!(.+>)*(babyformula)>).+>"
59
+ # => String
58
60
 
59
61
  TCE("groceries > nonfood") > TCE("baby") > ">> pacifiers"
60
- # => TieredCategoryExpressions::Expression[groceries > nonfood > baby >> pacifiers]
62
+ # => TieredCategoryExpressions::Expression
61
63
  ```
62
64
 
63
65
  ## Development
@@ -36,9 +36,12 @@ module TieredCategoryExpressions
36
36
  raise ParseError, "unexpected input at character #{column}"
37
37
  end
38
38
 
39
+ # @param strict [Boolean] If +true+ is given then the object should not match categories with tiers that extend
40
+ # beyond those specified by the TCE. This is the case when the TCE ends with ".".
39
41
  # @!visibility private
40
- def initialize(tiers)
42
+ def initialize(tiers, strict:)
41
43
  @tiers = tiers
44
+ @strict = !!strict
42
45
  end
43
46
 
44
47
  # @!visibility private
@@ -46,14 +49,18 @@ module TieredCategoryExpressions
46
49
  "TieredCategoryExpressions::Expression[#{self}]"
47
50
  end
48
51
 
52
+ # @param implied_root [Boolean] If +true+ no leading ">" is included.
49
53
  # @return [String] String representation of the expression
50
- def to_s
51
- @tiers.join(" ").sub(/^>(?!>)\s*/, "") # Initial ">" is implied (but ">>" is not)
54
+ def to_s(implied_root: true)
55
+ str = @tiers.join(" ")
56
+ str << "." if @strict
57
+ str = str.sub(/^>(?!>)\s*/, "") if implied_root # Initial ">" is implied (but ">>" is not)
58
+ str
52
59
  end
53
60
 
54
61
  # @return [Regexp] Regexp representation of the expression as a string (does not include flags)
55
62
  def as_regexp
56
- "^#{@tiers.map(&:as_regexp).join}"
63
+ "^#{@tiers.map(&:as_regexp).join}#{'$' if @strict}"
57
64
  end
58
65
 
59
66
  # @return [String] Regexp representation of the expression
@@ -65,6 +72,7 @@ module TieredCategoryExpressions
65
72
  #
66
73
  # @param category [Array<String>] Category to match
67
74
  # @return [Boolean]
75
+ #
68
76
  def matches?(category)
69
77
  to_regexp.match?(Preprocessor.call(category))
70
78
  end
@@ -89,14 +97,21 @@ module TieredCategoryExpressions
89
97
  # @return [Expression]
90
98
  #
91
99
  def >(other)
92
- self.class.new(@tiers + TieredCategoryExpressions::TCE(other).tiers)
100
+ TieredCategoryExpressions::TCE(to_s + TieredCategoryExpressions::TCE(other).to_s(implied_root: false))
101
+ end
102
+
103
+ # @return [Boolean] +true+ if the TCE object does not match categories with tiers that extend beyond those
104
+ # specified by the TCE. This is the case when the TCE ends with ".".
105
+ #
106
+ def strict?
107
+ @strict
93
108
  end
94
109
 
95
110
  # Returns an SQL LIKE query that may be used to speed up certain SQL queries.
96
111
  #
97
112
  # SQL queries that involve matching some input against stored TCE regexps can be slow. Possibly, they can be
98
113
  # optimized by applying a much faster LIKE query first, which reduces the number of regexps to apply. The LIKE
99
- # query alone still yields false positives, so it must be combined with the corresponding regexp.
114
+ # query alone can still yield false positives, so it must be combined with the corresponding regexp.
100
115
  #
101
116
  # For instance:
102
117
  #
@@ -109,7 +124,9 @@ module TieredCategoryExpressions
109
124
  # Depending on the TCEs in the _mappings_ table.
110
125
  #
111
126
  def as_sql_like_query
112
- @tiers.map(&:as_sql_like_query).join + "%"
127
+ q = @tiers.map(&:as_sql_like_query).join
128
+ q += "%" unless @strict || q.end_with?("%")
129
+ q
113
130
  end
114
131
 
115
132
  protected
@@ -11,14 +11,18 @@ module TieredCategoryExpressions
11
11
  # # => TieredCategoryExpressions::Expression[Nonfood > Baby > Baby formula]
12
12
  #
13
13
  # @param category [Array<String>]
14
+ # @param strict [Boolean] If +true+ is given then the resulting TCE will not match subcategories of the given
15
+ # category.
14
16
  # @return [Expression, nil]
15
17
  #
16
- def call(category)
18
+ def call(category, strict: false)
17
19
  return if category.empty?
18
20
 
19
21
  tiers = category.map { |t| sanitize_name(t) or return nil }
22
+ expression = tiers.join(">")
23
+ expression << "." if strict
20
24
 
21
- TieredCategoryExpressions::TCE(tiers.join(" > "))
25
+ TieredCategoryExpressions::TCE(expression)
22
26
  end
23
27
 
24
28
  private
@@ -17,9 +17,16 @@ module TieredCategoryExpressions
17
17
 
18
18
  rule(:word) { (match["[:alnum:]"] | str("%")).repeat(1) >> space? }
19
19
  rule(:name) { word.repeat(1).as(:name) }
20
- rule(:namelist) { (name >> (namesep >> name).repeat).as(:namelist) }
20
+ rule(:namelist) { (name.repeat(1, 1) >> (namesep >> name).repeat).as(:namelist) }
21
+
22
+ rule(:stop) { str(".") >> space? }
23
+
24
+ rule(:tier1) { (connector | negator).maybe.as(:operator) >> namelist }
25
+ rule(:tier) { connector.as(:operator) >> namelist }
26
+ rule(:tiers) { tier.repeat >> (stop >> (tier.repeat(1, 1) >> tiers).as(:tail)).maybe }
27
+
28
+ rule(:tce) { space? >> (tier1.repeat(1, 1) >> tiers).as(:tiers) >> stop.maybe.as(:eoct) }
21
29
 
22
- rule(:tce) { space? >> (((connector | negator).as(:operator).maybe >> namelist).as(:tier) >> (connector.as(:operator) >> namelist).as(:tier).repeat).as(:expression) }
23
30
  root(:tce)
24
31
  end
25
32
  end
@@ -0,0 +1,19 @@
1
+ module TieredCategoryExpressions
2
+ class Tail
3
+ def initialize(tiers)
4
+ @tiers = tiers
5
+ end
6
+
7
+ def to_s
8
+ ". " + @tiers.join(" ")
9
+ end
10
+
11
+ def as_regexp
12
+ "($|(#{@tiers.map(&:as_regexp).join}))"
13
+ end
14
+
15
+ def as_sql_like_query
16
+ "%"
17
+ end
18
+ end
19
+ end
@@ -1,5 +1,18 @@
1
1
  module TieredCategoryExpressions
2
2
  class Tier < Struct.new(:operator, :namelist)
3
+ def self.build(operator, names)
4
+ klass = case operator&.to_s&.tr(" ", "")
5
+ when ">", nil then Child
6
+ when ">!", "!" then IChild
7
+ when ">>" then Descendant
8
+ when ">>!" then IDescendant
9
+ else raise "no such operator #{operator.inspect}"
10
+ end
11
+
12
+ namelist = Namelist.new(names)
13
+ klass.new(namelist)
14
+ end
15
+
3
16
  def to_s
4
17
  "#{operator} #{namelist}"
5
18
  end
@@ -32,7 +45,7 @@ module TieredCategoryExpressions
32
45
  end
33
46
 
34
47
  def as_regexp
35
- "(?!#{namelist.as_regexp}>).+>"
48
+ "(?!#{namelist.as_regexp}>)[a-z0-9]+>"
36
49
  end
37
50
 
38
51
  def as_sql_like_query
@@ -46,7 +59,7 @@ module TieredCategoryExpressions
46
59
  end
47
60
 
48
61
  def as_regexp
49
- "(.+>)*#{namelist.as_regexp}>"
62
+ "([a-z0-9]+>)*#{namelist.as_regexp}>"
50
63
  end
51
64
 
52
65
  def as_sql_like_query
@@ -60,7 +73,7 @@ module TieredCategoryExpressions
60
73
  end
61
74
 
62
75
  def as_regexp
63
- "(?!(.+>)*#{namelist.as_regexp}>).+>"
76
+ "(?!([a-z0-9]+>)*#{namelist.as_regexp}>)([a-z0-9]+>)+"
64
77
  end
65
78
 
66
79
  def as_sql_like_query
@@ -2,25 +2,25 @@ require "parslet"
2
2
  require "tiered_category_expressions/name"
3
3
  require "tiered_category_expressions/namelist"
4
4
  require "tiered_category_expressions/tiers"
5
+ require "tiered_category_expressions/tail"
5
6
  require "tiered_category_expressions/expression"
6
7
 
7
8
  module TieredCategoryExpressions
8
9
  class Transformer < Parslet::Transform
9
- rule(:name => simple(:name)) { Name.new(name.to_s) }
10
+ rule(:name => simple(:name)) do
11
+ Name.new(name.to_s)
12
+ end
10
13
 
11
- rule(:tier => subtree(:tier)) do
12
- klass = case tier[:operator]&.to_s&.tr(" ", "")
13
- when ">", nil then Tier::Child
14
- when ">!", "!" then Tier::IChild
15
- when ">>" then Tier::Descendant
16
- when ">>!" then Tier::IDescendant
17
- else raise "no such operator #{tier[:operator].inspect}"
18
- end
14
+ rule(:operator => simple(:op), :namelist => sequence(:names)) do
15
+ Tier.build(op, names)
16
+ end
19
17
 
20
- namelist = Namelist.new([tier[:namelist]].flatten)
21
- klass.new(namelist)
18
+ rule(:tail => sequence(:tiers)) do
19
+ Tail.new(tiers)
22
20
  end
23
21
 
24
- rule(:expression => subtree(:tiers)) { Expression.new([tiers].flatten) }
22
+ rule(:tiers => sequence(:tiers), :eoct => simple(:eoct)) do
23
+ Expression.new(tiers, strict: !!eoct)
24
+ end
25
25
  end
26
26
  end
@@ -1,3 +1,3 @@
1
1
  module TieredCategoryExpressions
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiered_category_expressions
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sjoerd Andringa
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-05-03 00:00:00.000000000 Z
11
+ date: 2019-05-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,6 +122,7 @@ files:
122
122
  - lib/tiered_category_expressions/namelist.rb
123
123
  - lib/tiered_category_expressions/parser.rb
124
124
  - lib/tiered_category_expressions/preprocessor.rb
125
+ - lib/tiered_category_expressions/tail.rb
125
126
  - lib/tiered_category_expressions/tiers.rb
126
127
  - lib/tiered_category_expressions/transformer.rb
127
128
  - lib/tiered_category_expressions/util.rb