rs_path_tokenizer 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.editorconfig +25 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +62 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/rs_path_tokenizer.rb +3 -0
- data/lib/rs_path_tokenizer/error.rb +3 -0
- data/lib/rs_path_tokenizer/tokenizer.rb +164 -0
- data/lib/rs_path_tokenizer/version.rb +3 -0
- data/spec/rs_path_tokenizer_spec.rb +62 -0
- data/spec/spec_helper.rb +2 -0
- metadata +109 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6655c43ba457ceabc403de89daa4c33f6371a20b
|
4
|
+
data.tar.gz: 9815bcc5540f0167921f6bbf6409ff5b67e26b27
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e1ce0bf52544242abd6f82d075569ee55929d5c8eb78fc877221dd43c46c3f42ed6729438fb4dd06ab1b265bbd941adf72217786c2b3c89d0bef5f74e06eb403
|
7
|
+
data.tar.gz: 57d05b13324b25963689f1e30ac913f1b6c089f13f816424b5865421bfae21ebd4cba7e333e909398c85377907f21fa52ae2aa413275d95bc933c8a96f10d78f
|
data/.editorconfig
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
end_of_line = lf
|
5
|
+
insert_final_newline = true
|
6
|
+
trim_trailing_whitespace = true
|
7
|
+
tab_width = 2
|
8
|
+
indent_style = space
|
9
|
+
indent_size = 2
|
10
|
+
|
11
|
+
[**.bat]
|
12
|
+
end_of_line = crlf
|
13
|
+
|
14
|
+
[**.min.*]
|
15
|
+
indent_style = ignore
|
16
|
+
trim_trailing_whitespace = false
|
17
|
+
insert_final_newline = ignore
|
18
|
+
|
19
|
+
[*.slim]
|
20
|
+
insert_final_newline = false
|
21
|
+
trim_trailing_whitespace = false
|
22
|
+
|
23
|
+
[*.txt]
|
24
|
+
insert_final_newline = false
|
25
|
+
trim_trailing_whitespace = false
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rs_path_tokenizer
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.2.1
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Sergey Malykh
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# RsPathTokenizer
|
2
|
+
|
3
|
+
PathTokenizer founds predefined parts (tokens) into specified URL
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'rs_path_tokenizer'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install rs_path_tokenizer
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
# define tokens data
|
25
|
+
# hash key - token's URL code
|
26
|
+
# hash value - returned property & value (ie for SQL query)
|
27
|
+
tokens_data = {
|
28
|
+
'balashiha' => ['region', 'balashiha'],
|
29
|
+
'balashiha-gorodskoj-okrug' => ['region', 'balashiha-gorodskoj-okrug'],
|
30
|
+
'gorodskoj-okrug-drugoi' => ['region', 'gorodskoj-okrug-drugoi'],
|
31
|
+
# price from
|
32
|
+
'price-*' => ['price', nil],
|
33
|
+
# price from any to any (including from 0 to any)
|
34
|
+
'price-*-*' => ['price', nil],
|
35
|
+
'expensive' => ['sort', 'expensive']
|
36
|
+
}
|
37
|
+
|
38
|
+
tokenizer = RsPathTokenizer::Tokenizer.new(tokens_data)
|
39
|
+
|
40
|
+
# search tokens in specified URL
|
41
|
+
found_tokens = tokenizer.tokenize('balashiha-gorodskoj-okrug-drugoi-price-100-expensive')
|
42
|
+
|
43
|
+
# {"balashiha"=>["region", "balashiha"],
|
44
|
+
# "gorodskoj-okrug-drugoi"=>["region", "gorodskoj-okrug-drugoi"],
|
45
|
+
# "price-*"=>["price", "100"],
|
46
|
+
# "expensive"=>["sort", "expensive"]}
|
47
|
+
|
48
|
+
```
|
49
|
+
|
50
|
+
## Development
|
51
|
+
|
52
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
53
|
+
|
54
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
55
|
+
|
56
|
+
## Contributing
|
57
|
+
|
58
|
+
1. Fork it ( https://github.com/[my-github-username]/rs_path_tokenizer/fork )
|
59
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
60
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
61
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
62
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "rs_path_tokenizer"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
module RsPathTokenizer
|
2
|
+
class Tokenizer
|
3
|
+
PT_DEBUG = false
|
4
|
+
# PT_DEBUG = true
|
5
|
+
|
6
|
+
def initialize(tokens = nil)
|
7
|
+
return if tokens.nil?
|
8
|
+
@single_tokens = {}
|
9
|
+
tokens.keys.each do |t|
|
10
|
+
parts = url2token(t)
|
11
|
+
st = parts[0]
|
12
|
+
raise Error.new('Token cant starts with asterisk') if st == '*'
|
13
|
+
@single_tokens[st] = [] if @single_tokens[st].nil?
|
14
|
+
@single_tokens[st].push parts
|
15
|
+
end
|
16
|
+
@token_map = tokens
|
17
|
+
end
|
18
|
+
|
19
|
+
def marshal_dump
|
20
|
+
[@single_tokens, @token_map]
|
21
|
+
end
|
22
|
+
|
23
|
+
def marshal_load array
|
24
|
+
@single_tokens, @token_map = array
|
25
|
+
end
|
26
|
+
|
27
|
+
# best result
|
28
|
+
def tokenize(string)
|
29
|
+
tokens = tokenize_all(string).first
|
30
|
+
return if tokens.nil?
|
31
|
+
|
32
|
+
result_to_hash(tokens)
|
33
|
+
end
|
34
|
+
|
35
|
+
protected
|
36
|
+
|
37
|
+
# all results
|
38
|
+
def tokenize_all(string)
|
39
|
+
array = url2token(string)
|
40
|
+
raise Error.new('Too long URL') if array.length > 500
|
41
|
+
possible_tokens = Hash[@single_tokens.keys.select do |st|
|
42
|
+
array.include?(st)
|
43
|
+
end.map do |st|
|
44
|
+
[st, @single_tokens[st]]
|
45
|
+
end]
|
46
|
+
@out_token_map = @token_map
|
47
|
+
sort_results(recursive_parse(array, possible_tokens))
|
48
|
+
end
|
49
|
+
|
50
|
+
def sort_results(results)
|
51
|
+
results.sort do |a, b|
|
52
|
+
result = b.flatten.length <=> a.flatten.length
|
53
|
+
result = b.length <=> a.length if result == 0
|
54
|
+
puts "sorting: #{a.inspect} #{b.inspect} #{result}" if PT_DEBUG
|
55
|
+
result
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def result_to_hash(array)
|
60
|
+
Hash[array.map do |e|
|
61
|
+
k = token2url(e)
|
62
|
+
[k, @out_token_map[k]]
|
63
|
+
end]
|
64
|
+
end
|
65
|
+
|
66
|
+
def recursive_parse(array, possible_tokens, limiter = 1)
|
67
|
+
if limiter > 30
|
68
|
+
raise Error.new('Too deep recursion')
|
69
|
+
end
|
70
|
+
|
71
|
+
st = array.first
|
72
|
+
return [] if st.to_s.strip == ''
|
73
|
+
|
74
|
+
tokens = possible_tokens[st]
|
75
|
+
if tokens.nil?
|
76
|
+
puts "#{" " * limiter}NO tokens for #{st}" if PT_DEBUG
|
77
|
+
return recursive_parse(array.slice(1..-1), possible_tokens)
|
78
|
+
end
|
79
|
+
|
80
|
+
results = []
|
81
|
+
puts "#{" " * limiter}possible tokens for #{st} are: #{tokens.inspect}" if PT_DEBUG
|
82
|
+
|
83
|
+
tokens.each do |token|
|
84
|
+
found, out, rest = try_match(token, array)
|
85
|
+
puts "#{" " * limiter}matching #{token.inspect}" if PT_DEBUG
|
86
|
+
|
87
|
+
if found
|
88
|
+
if out != token
|
89
|
+
@out_token_map[token2url(token)] = out
|
90
|
+
end
|
91
|
+
|
92
|
+
puts "#{" " * limiter}found a token: #{token.inspect}, parsing rest: #{rest.inspect}" if PT_DEBUG
|
93
|
+
more = recursive_parse(rest.dup, possible_tokens, limiter + 1)
|
94
|
+
results = merge_results(results, token, more)
|
95
|
+
|
96
|
+
else
|
97
|
+
puts "#{" " * limiter}found none on this level, NOT parsing rest: #{rest.inspect}" if PT_DEBUG
|
98
|
+
more = recursive_parse(array.dup.slice(1..-1), possible_tokens, limiter + 1)
|
99
|
+
results = merge_results(results, nil, more)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if PT_DEBUG
|
104
|
+
puts "#{" " * limiter}results:"
|
105
|
+
results.each do |r|
|
106
|
+
puts "#{" " * limiter} #{r.inspect}"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
results
|
111
|
+
end
|
112
|
+
|
113
|
+
def merge_results(results, found, other)
|
114
|
+
if other.empty?
|
115
|
+
unless found.nil?
|
116
|
+
results.push [found]
|
117
|
+
end
|
118
|
+
else
|
119
|
+
if found.nil?
|
120
|
+
other.each do |o|
|
121
|
+
results.push o
|
122
|
+
end
|
123
|
+
else
|
124
|
+
other.each do |o|
|
125
|
+
results.push [found] + o
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
results.map(&:uniq).uniq
|
130
|
+
end
|
131
|
+
|
132
|
+
def try_match(token, array)
|
133
|
+
found, out = [], []
|
134
|
+
rest = array.dup
|
135
|
+
|
136
|
+
token.each do |token_part|
|
137
|
+
url_part = rest.shift
|
138
|
+
break if url_part.nil?
|
139
|
+
|
140
|
+
if token_part == '*'
|
141
|
+
out.push url_part
|
142
|
+
found.push token_part
|
143
|
+
|
144
|
+
elsif token_part == url_part
|
145
|
+
found.push token_part
|
146
|
+
out.push token_part
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
if found == token
|
151
|
+
[true, out, rest]
|
152
|
+
else
|
153
|
+
[false, out, array]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def token2url(token)
|
158
|
+
token.join('-')
|
159
|
+
end
|
160
|
+
def url2token(url)
|
161
|
+
url.split("-")
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe RsPathTokenizer do
|
4
|
+
before :each do
|
5
|
+
@tokens_data = {
|
6
|
+
'balashiha' => ['r', 'balashiha'],
|
7
|
+
'gorodskoj-okrug-balashiha-1' => ['r', 'balashiha-1'],
|
8
|
+
'gorodskoj-okrug-balashiha-11' => ['r', 'balashiha-11'],
|
9
|
+
'balashiha-gorodskoj-okrug' => ['r', 'balashiha-gorodskoj-okrug'],
|
10
|
+
'gorodskoj-okrug-drugoi' => ['r', 'gorodskoj-okrug-drugoi'],
|
11
|
+
'expensive' => ['sort', 'expensive'],
|
12
|
+
}
|
13
|
+
|
14
|
+
@tokenizer = RsPathTokenizer::Tokenizer.new( @tokens_data )
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'shows best results when tokens overlaps' do
|
18
|
+
results = {"balashiha" => ["r", "balashiha"], "gorodskoj-okrug-drugoi" => ["r", "gorodskoj-okrug-drugoi"]}
|
19
|
+
|
20
|
+
expect( @tokenizer.tokenize( 'balashiha-gorodskoj-okrug-drugoi' ) ).to eq results
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'shows matched result' do
|
24
|
+
results = {"gorodskoj-okrug-balashiha-1" => ["r", "balashiha-1"]}
|
25
|
+
expect( @tokenizer.tokenize( 'gorodskoj-okrug-balashiha-1' ) ).to eq results
|
26
|
+
|
27
|
+
results = {"gorodskoj-okrug-balashiha-11" => ["r", "balashiha-11"]}
|
28
|
+
expect( @tokenizer.tokenize( 'gorodskoj-okrug-balashiha-11' ) ).to eq results
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns price range' do
|
32
|
+
@tokens_data['price-*-*'] = ["price", nil]
|
33
|
+
@tokens_data['price-*'] = ["price", nil]
|
34
|
+
|
35
|
+
@tokenizer = RsPathTokenizer::Tokenizer.new( @tokens_data )
|
36
|
+
|
37
|
+
results = {"balashiha"=>["r", "balashiha"], 'price-*-*' => ["price", "0", "100"]}
|
38
|
+
expect( @tokenizer.tokenize( 'balashiha-price-0-100' ) ).to eq results
|
39
|
+
|
40
|
+
results = {"balashiha"=>["r", "balashiha"], 'price-*' => ["price", "100"]}
|
41
|
+
expect( @tokenizer.tokenize( 'balashiha-price-100' ) ).to eq results
|
42
|
+
|
43
|
+
results = {"balashiha"=>["r", "balashiha"], "price-*-*"=>["price", "0", "100"], "expensive"=>["sort", "expensive"]}
|
44
|
+
expect( @tokenizer.tokenize( 'balashiha-price-0-100-expensive' ) ).to eq results
|
45
|
+
|
46
|
+
results = {"balashiha"=>["r", "balashiha"], "price-*"=>["price", "100"], "expensive"=>["sort", "expensive"]}
|
47
|
+
expect( @tokenizer.tokenize( 'balashiha-price-100-expensive' ) ).to eq results
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'nothing found' do
|
51
|
+
expect( @tokenizer.tokenize( 'incorrect-url' ) ).to eq nil
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'too long url' do
|
55
|
+
expect{ @tokenizer.tokenize( 'a-b' * 501 ) }.to raise_error RsPathTokenizer::Error
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'incorrect token' do
|
59
|
+
tokens_data = {'*' => ["all", true]}
|
60
|
+
expect{ RsPathTokenizer::Tokenizer.new( tokens_data ) }.to raise_error RsPathTokenizer::Error
|
61
|
+
end
|
62
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rs_path_tokenizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- glebtv
|
8
|
+
- Sergey Malykh
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-03-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :development
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rspec
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
description: Tokenize path from predefined tokens.
|
57
|
+
email:
|
58
|
+
- xronos.i.am@gmail.com
|
59
|
+
executables:
|
60
|
+
- console
|
61
|
+
- setup
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files: []
|
64
|
+
files:
|
65
|
+
- ".editorconfig"
|
66
|
+
- ".gitignore"
|
67
|
+
- ".rspec"
|
68
|
+
- ".ruby-gemset"
|
69
|
+
- ".ruby-version"
|
70
|
+
- ".travis.yml"
|
71
|
+
- Gemfile
|
72
|
+
- LICENSE.txt
|
73
|
+
- README.md
|
74
|
+
- Rakefile
|
75
|
+
- bin/console
|
76
|
+
- bin/setup
|
77
|
+
- lib/rs_path_tokenizer.rb
|
78
|
+
- lib/rs_path_tokenizer/error.rb
|
79
|
+
- lib/rs_path_tokenizer/tokenizer.rb
|
80
|
+
- lib/rs_path_tokenizer/version.rb
|
81
|
+
- spec/rs_path_tokenizer_spec.rb
|
82
|
+
- spec/spec_helper.rb
|
83
|
+
homepage: http://github.com/xronos-i-am/rs_path_tokenizer
|
84
|
+
licenses:
|
85
|
+
- MIT
|
86
|
+
metadata: {}
|
87
|
+
post_install_message:
|
88
|
+
rdoc_options: []
|
89
|
+
require_paths:
|
90
|
+
- lib
|
91
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 2.4.6
|
104
|
+
signing_key:
|
105
|
+
specification_version: 4
|
106
|
+
summary: URL path tokenizer.
|
107
|
+
test_files:
|
108
|
+
- spec/rs_path_tokenizer_spec.rb
|
109
|
+
- spec/spec_helper.rb
|