rs_path_tokenizer 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6655c43ba457ceabc403de89daa4c33f6371a20b
4
+ data.tar.gz: 9815bcc5540f0167921f6bbf6409ff5b67e26b27
5
+ SHA512:
6
+ metadata.gz: e1ce0bf52544242abd6f82d075569ee55929d5c8eb78fc877221dd43c46c3f42ed6729438fb4dd06ab1b265bbd941adf72217786c2b3c89d0bef5f74e06eb403
7
+ data.tar.gz: 57d05b13324b25963689f1e30ac913f1b6c089f13f816424b5865421bfae21ebd4cba7e333e909398c85377907f21fa52ae2aa413275d95bc933c8a96f10d78f
@@ -0,0 +1,25 @@
1
+ root = true
2
+
3
+ [*]
4
+ end_of_line = lf
5
+ insert_final_newline = true
6
+ trim_trailing_whitespace = true
7
+ tab_width = 2
8
+ indent_style = space
9
+ indent_size = 2
10
+
11
+ [**.bat]
12
+ end_of_line = crlf
13
+
14
+ [**.min.*]
15
+ indent_style = ignore
16
+ trim_trailing_whitespace = false
17
+ insert_final_newline = ignore
18
+
19
+ [*.slim]
20
+ insert_final_newline = false
21
+ trim_trailing_whitespace = false
22
+
23
+ [*.txt]
24
+ insert_final_newline = false
25
+ trim_trailing_whitespace = false
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1 @@
1
+ rs_path_tokenizer
@@ -0,0 +1 @@
1
+ 2.2.1
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.1
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rs_path_tokenizer.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Sergey Malykh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,62 @@
1
+ # RsPathTokenizer
2
+
3
+ PathTokenizer founds predefined parts (tokens) into specified URL
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'rs_path_tokenizer'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install rs_path_tokenizer
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+ # define tokens data
25
+ # hash key - token's URL code
26
+ # hash value - returned property & value (ie for SQL query)
27
+ tokens_data = {
28
+ 'balashiha' => ['region', 'balashiha'],
29
+ 'balashiha-gorodskoj-okrug' => ['region', 'balashiha-gorodskoj-okrug'],
30
+ 'gorodskoj-okrug-drugoi' => ['region', 'gorodskoj-okrug-drugoi'],
31
+ # price from
32
+ 'price-*' => ['price', nil],
33
+ # price from any to any (including from 0 to any)
34
+ 'price-*-*' => ['price', nil],
35
+ 'expensive' => ['sort', 'expensive']
36
+ }
37
+
38
+ tokenizer = RsPathTokenizer::Tokenizer.new(tokens_data)
39
+
40
+ # search tokens in specified URL
41
+ found_tokens = tokenizer.tokenize('balashiha-gorodskoj-okrug-drugoi-price-100-expensive')
42
+
43
+ # {"balashiha"=>["region", "balashiha"],
44
+ # "gorodskoj-okrug-drugoi"=>["region", "gorodskoj-okrug-drugoi"],
45
+ # "price-*"=>["price", "100"],
46
+ # "expensive"=>["sort", "expensive"]}
47
+
48
+ ```
49
+
50
+ ## Development
51
+
52
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
53
+
54
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
55
+
56
+ ## Contributing
57
+
58
+ 1. Fork it ( https://github.com/[my-github-username]/rs_path_tokenizer/fork )
59
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
60
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
61
+ 4. Push to the branch (`git push origin my-new-feature`)
62
+ 5. Create a new Pull Request
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "rs_path_tokenizer"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,3 @@
1
+ require "rs_path_tokenizer/version"
2
+ require "rs_path_tokenizer/error"
3
+ require "rs_path_tokenizer/tokenizer"
@@ -0,0 +1,3 @@
1
+ module RsPathTokenizer
2
+ class Error < RuntimeError; end
3
+ end
@@ -0,0 +1,164 @@
1
+ module RsPathTokenizer
2
+ class Tokenizer
3
+ PT_DEBUG = false
4
+ # PT_DEBUG = true
5
+
6
+ def initialize(tokens = nil)
7
+ return if tokens.nil?
8
+ @single_tokens = {}
9
+ tokens.keys.each do |t|
10
+ parts = url2token(t)
11
+ st = parts[0]
12
+ raise Error.new('Token cant starts with asterisk') if st == '*'
13
+ @single_tokens[st] = [] if @single_tokens[st].nil?
14
+ @single_tokens[st].push parts
15
+ end
16
+ @token_map = tokens
17
+ end
18
+
19
+ def marshal_dump
20
+ [@single_tokens, @token_map]
21
+ end
22
+
23
+ def marshal_load array
24
+ @single_tokens, @token_map = array
25
+ end
26
+
27
+ # best result
28
+ def tokenize(string)
29
+ tokens = tokenize_all(string).first
30
+ return if tokens.nil?
31
+
32
+ result_to_hash(tokens)
33
+ end
34
+
35
+ protected
36
+
37
+ # all results
38
+ def tokenize_all(string)
39
+ array = url2token(string)
40
+ raise Error.new('Too long URL') if array.length > 500
41
+ possible_tokens = Hash[@single_tokens.keys.select do |st|
42
+ array.include?(st)
43
+ end.map do |st|
44
+ [st, @single_tokens[st]]
45
+ end]
46
+ @out_token_map = @token_map
47
+ sort_results(recursive_parse(array, possible_tokens))
48
+ end
49
+
50
+ def sort_results(results)
51
+ results.sort do |a, b|
52
+ result = b.flatten.length <=> a.flatten.length
53
+ result = b.length <=> a.length if result == 0
54
+ puts "sorting: #{a.inspect} #{b.inspect} #{result}" if PT_DEBUG
55
+ result
56
+ end
57
+ end
58
+
59
+ def result_to_hash(array)
60
+ Hash[array.map do |e|
61
+ k = token2url(e)
62
+ [k, @out_token_map[k]]
63
+ end]
64
+ end
65
+
66
+ def recursive_parse(array, possible_tokens, limiter = 1)
67
+ if limiter > 30
68
+ raise Error.new('Too deep recursion')
69
+ end
70
+
71
+ st = array.first
72
+ return [] if st.to_s.strip == ''
73
+
74
+ tokens = possible_tokens[st]
75
+ if tokens.nil?
76
+ puts "#{" " * limiter}NO tokens for #{st}" if PT_DEBUG
77
+ return recursive_parse(array.slice(1..-1), possible_tokens)
78
+ end
79
+
80
+ results = []
81
+ puts "#{" " * limiter}possible tokens for #{st} are: #{tokens.inspect}" if PT_DEBUG
82
+
83
+ tokens.each do |token|
84
+ found, out, rest = try_match(token, array)
85
+ puts "#{" " * limiter}matching #{token.inspect}" if PT_DEBUG
86
+
87
+ if found
88
+ if out != token
89
+ @out_token_map[token2url(token)] = out
90
+ end
91
+
92
+ puts "#{" " * limiter}found a token: #{token.inspect}, parsing rest: #{rest.inspect}" if PT_DEBUG
93
+ more = recursive_parse(rest.dup, possible_tokens, limiter + 1)
94
+ results = merge_results(results, token, more)
95
+
96
+ else
97
+ puts "#{" " * limiter}found none on this level, NOT parsing rest: #{rest.inspect}" if PT_DEBUG
98
+ more = recursive_parse(array.dup.slice(1..-1), possible_tokens, limiter + 1)
99
+ results = merge_results(results, nil, more)
100
+ end
101
+ end
102
+
103
+ if PT_DEBUG
104
+ puts "#{" " * limiter}results:"
105
+ results.each do |r|
106
+ puts "#{" " * limiter} #{r.inspect}"
107
+ end
108
+ end
109
+
110
+ results
111
+ end
112
+
113
+ def merge_results(results, found, other)
114
+ if other.empty?
115
+ unless found.nil?
116
+ results.push [found]
117
+ end
118
+ else
119
+ if found.nil?
120
+ other.each do |o|
121
+ results.push o
122
+ end
123
+ else
124
+ other.each do |o|
125
+ results.push [found] + o
126
+ end
127
+ end
128
+ end
129
+ results.map(&:uniq).uniq
130
+ end
131
+
132
+ def try_match(token, array)
133
+ found, out = [], []
134
+ rest = array.dup
135
+
136
+ token.each do |token_part|
137
+ url_part = rest.shift
138
+ break if url_part.nil?
139
+
140
+ if token_part == '*'
141
+ out.push url_part
142
+ found.push token_part
143
+
144
+ elsif token_part == url_part
145
+ found.push token_part
146
+ out.push token_part
147
+ end
148
+ end
149
+
150
+ if found == token
151
+ [true, out, rest]
152
+ else
153
+ [false, out, array]
154
+ end
155
+ end
156
+
157
+ def token2url(token)
158
+ token.join('-')
159
+ end
160
+ def url2token(url)
161
+ url.split("-")
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,3 @@
1
+ module RsPathTokenizer
2
+ VERSION = "0.2.0"
3
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+
3
+ describe RsPathTokenizer do
4
+ before :each do
5
+ @tokens_data = {
6
+ 'balashiha' => ['r', 'balashiha'],
7
+ 'gorodskoj-okrug-balashiha-1' => ['r', 'balashiha-1'],
8
+ 'gorodskoj-okrug-balashiha-11' => ['r', 'balashiha-11'],
9
+ 'balashiha-gorodskoj-okrug' => ['r', 'balashiha-gorodskoj-okrug'],
10
+ 'gorodskoj-okrug-drugoi' => ['r', 'gorodskoj-okrug-drugoi'],
11
+ 'expensive' => ['sort', 'expensive'],
12
+ }
13
+
14
+ @tokenizer = RsPathTokenizer::Tokenizer.new( @tokens_data )
15
+ end
16
+
17
+ it 'shows best results when tokens overlaps' do
18
+ results = {"balashiha" => ["r", "balashiha"], "gorodskoj-okrug-drugoi" => ["r", "gorodskoj-okrug-drugoi"]}
19
+
20
+ expect( @tokenizer.tokenize( 'balashiha-gorodskoj-okrug-drugoi' ) ).to eq results
21
+ end
22
+
23
+ it 'shows matched result' do
24
+ results = {"gorodskoj-okrug-balashiha-1" => ["r", "balashiha-1"]}
25
+ expect( @tokenizer.tokenize( 'gorodskoj-okrug-balashiha-1' ) ).to eq results
26
+
27
+ results = {"gorodskoj-okrug-balashiha-11" => ["r", "balashiha-11"]}
28
+ expect( @tokenizer.tokenize( 'gorodskoj-okrug-balashiha-11' ) ).to eq results
29
+ end
30
+
31
+ it 'returns price range' do
32
+ @tokens_data['price-*-*'] = ["price", nil]
33
+ @tokens_data['price-*'] = ["price", nil]
34
+
35
+ @tokenizer = RsPathTokenizer::Tokenizer.new( @tokens_data )
36
+
37
+ results = {"balashiha"=>["r", "balashiha"], 'price-*-*' => ["price", "0", "100"]}
38
+ expect( @tokenizer.tokenize( 'balashiha-price-0-100' ) ).to eq results
39
+
40
+ results = {"balashiha"=>["r", "balashiha"], 'price-*' => ["price", "100"]}
41
+ expect( @tokenizer.tokenize( 'balashiha-price-100' ) ).to eq results
42
+
43
+ results = {"balashiha"=>["r", "balashiha"], "price-*-*"=>["price", "0", "100"], "expensive"=>["sort", "expensive"]}
44
+ expect( @tokenizer.tokenize( 'balashiha-price-0-100-expensive' ) ).to eq results
45
+
46
+ results = {"balashiha"=>["r", "balashiha"], "price-*"=>["price", "100"], "expensive"=>["sort", "expensive"]}
47
+ expect( @tokenizer.tokenize( 'balashiha-price-100-expensive' ) ).to eq results
48
+ end
49
+
50
+ it 'nothing found' do
51
+ expect( @tokenizer.tokenize( 'incorrect-url' ) ).to eq nil
52
+ end
53
+
54
+ it 'too long url' do
55
+ expect{ @tokenizer.tokenize( 'a-b' * 501 ) }.to raise_error RsPathTokenizer::Error
56
+ end
57
+
58
+ it 'incorrect token' do
59
+ tokens_data = {'*' => ["all", true]}
60
+ expect{ RsPathTokenizer::Tokenizer.new( tokens_data ) }.to raise_error RsPathTokenizer::Error
61
+ end
62
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'rs_path_tokenizer'
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rs_path_tokenizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - glebtv
8
+ - Sergey Malykh
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-03-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ description: Tokenize path from predefined tokens.
57
+ email:
58
+ - xronos.i.am@gmail.com
59
+ executables:
60
+ - console
61
+ - setup
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - ".editorconfig"
66
+ - ".gitignore"
67
+ - ".rspec"
68
+ - ".ruby-gemset"
69
+ - ".ruby-version"
70
+ - ".travis.yml"
71
+ - Gemfile
72
+ - LICENSE.txt
73
+ - README.md
74
+ - Rakefile
75
+ - bin/console
76
+ - bin/setup
77
+ - lib/rs_path_tokenizer.rb
78
+ - lib/rs_path_tokenizer/error.rb
79
+ - lib/rs_path_tokenizer/tokenizer.rb
80
+ - lib/rs_path_tokenizer/version.rb
81
+ - spec/rs_path_tokenizer_spec.rb
82
+ - spec/spec_helper.rb
83
+ homepage: http://github.com/xronos-i-am/rs_path_tokenizer
84
+ licenses:
85
+ - MIT
86
+ metadata: {}
87
+ post_install_message:
88
+ rdoc_options: []
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ requirements: []
102
+ rubyforge_project:
103
+ rubygems_version: 2.4.6
104
+ signing_key:
105
+ specification_version: 4
106
+ summary: URL path tokenizer.
107
+ test_files:
108
+ - spec/rs_path_tokenizer_spec.rb
109
+ - spec/spec_helper.rb