rs_path_tokenizer 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6655c43ba457ceabc403de89daa4c33f6371a20b
4
+ data.tar.gz: 9815bcc5540f0167921f6bbf6409ff5b67e26b27
5
+ SHA512:
6
+ metadata.gz: e1ce0bf52544242abd6f82d075569ee55929d5c8eb78fc877221dd43c46c3f42ed6729438fb4dd06ab1b265bbd941adf72217786c2b3c89d0bef5f74e06eb403
7
+ data.tar.gz: 57d05b13324b25963689f1e30ac913f1b6c089f13f816424b5865421bfae21ebd4cba7e333e909398c85377907f21fa52ae2aa413275d95bc933c8a96f10d78f
@@ -0,0 +1,25 @@
1
+ root = true
2
+
3
+ [*]
4
+ end_of_line = lf
5
+ insert_final_newline = true
6
+ trim_trailing_whitespace = true
7
+ tab_width = 2
8
+ indent_style = space
9
+ indent_size = 2
10
+
11
+ [**.bat]
12
+ end_of_line = crlf
13
+
14
+ [**.min.*]
15
+ indent_style = ignore
16
+ trim_trailing_whitespace = false
17
+ insert_final_newline = ignore
18
+
19
+ [*.slim]
20
+ insert_final_newline = false
21
+ trim_trailing_whitespace = false
22
+
23
+ [*.txt]
24
+ insert_final_newline = false
25
+ trim_trailing_whitespace = false
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1 @@
1
+ rs_path_tokenizer
@@ -0,0 +1 @@
1
+ 2.2.1
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.1
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rs_path_tokenizer.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Sergey Malykh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,62 @@
1
+ # RsPathTokenizer
2
+
3
+ PathTokenizer founds predefined parts (tokens) into specified URL
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'rs_path_tokenizer'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install rs_path_tokenizer
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+ # define tokens data
25
+ # hash key - token's URL code
26
+ # hash value - returned property & value (ie for SQL query)
27
+ tokens_data = {
28
+ 'balashiha' => ['region', 'balashiha'],
29
+ 'balashiha-gorodskoj-okrug' => ['region', 'balashiha-gorodskoj-okrug'],
30
+ 'gorodskoj-okrug-drugoi' => ['region', 'gorodskoj-okrug-drugoi'],
31
+ # price from
32
+ 'price-*' => ['price', nil],
33
+ # price from any to any (including from 0 to any)
34
+ 'price-*-*' => ['price', nil],
35
+ 'expensive' => ['sort', 'expensive']
36
+ }
37
+
38
+ tokenizer = RsPathTokenizer::Tokenizer.new(tokens_data)
39
+
40
+ # search tokens in specified URL
41
+ found_tokens = tokenizer.tokenize('balashiha-gorodskoj-okrug-drugoi-price-100-expensive')
42
+
43
+ # {"balashiha"=>["region", "balashiha"],
44
+ # "gorodskoj-okrug-drugoi"=>["region", "gorodskoj-okrug-drugoi"],
45
+ # "price-*"=>["price", "100"],
46
+ # "expensive"=>["sort", "expensive"]}
47
+
48
+ ```
49
+
50
+ ## Development
51
+
52
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
53
+
54
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
55
+
56
+ ## Contributing
57
+
58
+ 1. Fork it ( https://github.com/[my-github-username]/rs_path_tokenizer/fork )
59
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
60
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
61
+ 4. Push to the branch (`git push origin my-new-feature`)
62
+ 5. Create a new Pull Request
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "rs_path_tokenizer"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,3 @@
1
+ require "rs_path_tokenizer/version"
2
+ require "rs_path_tokenizer/error"
3
+ require "rs_path_tokenizer/tokenizer"
@@ -0,0 +1,3 @@
1
+ module RsPathTokenizer
2
+ class Error < RuntimeError; end
3
+ end
@@ -0,0 +1,164 @@
1
+ module RsPathTokenizer
2
+ class Tokenizer
3
+ PT_DEBUG = false
4
+ # PT_DEBUG = true
5
+
6
+ def initialize(tokens = nil)
7
+ return if tokens.nil?
8
+ @single_tokens = {}
9
+ tokens.keys.each do |t|
10
+ parts = url2token(t)
11
+ st = parts[0]
12
+ raise Error.new('Token cant starts with asterisk') if st == '*'
13
+ @single_tokens[st] = [] if @single_tokens[st].nil?
14
+ @single_tokens[st].push parts
15
+ end
16
+ @token_map = tokens
17
+ end
18
+
19
+ def marshal_dump
20
+ [@single_tokens, @token_map]
21
+ end
22
+
23
+ def marshal_load array
24
+ @single_tokens, @token_map = array
25
+ end
26
+
27
+ # best result
28
+ def tokenize(string)
29
+ tokens = tokenize_all(string).first
30
+ return if tokens.nil?
31
+
32
+ result_to_hash(tokens)
33
+ end
34
+
35
+ protected
36
+
37
+ # all results
38
+ def tokenize_all(string)
39
+ array = url2token(string)
40
+ raise Error.new('Too long URL') if array.length > 500
41
+ possible_tokens = Hash[@single_tokens.keys.select do |st|
42
+ array.include?(st)
43
+ end.map do |st|
44
+ [st, @single_tokens[st]]
45
+ end]
46
+ @out_token_map = @token_map
47
+ sort_results(recursive_parse(array, possible_tokens))
48
+ end
49
+
50
+ def sort_results(results)
51
+ results.sort do |a, b|
52
+ result = b.flatten.length <=> a.flatten.length
53
+ result = b.length <=> a.length if result == 0
54
+ puts "sorting: #{a.inspect} #{b.inspect} #{result}" if PT_DEBUG
55
+ result
56
+ end
57
+ end
58
+
59
+ def result_to_hash(array)
60
+ Hash[array.map do |e|
61
+ k = token2url(e)
62
+ [k, @out_token_map[k]]
63
+ end]
64
+ end
65
+
66
+ def recursive_parse(array, possible_tokens, limiter = 1)
67
+ if limiter > 30
68
+ raise Error.new('Too deep recursion')
69
+ end
70
+
71
+ st = array.first
72
+ return [] if st.to_s.strip == ''
73
+
74
+ tokens = possible_tokens[st]
75
+ if tokens.nil?
76
+ puts "#{" " * limiter}NO tokens for #{st}" if PT_DEBUG
77
+ return recursive_parse(array.slice(1..-1), possible_tokens)
78
+ end
79
+
80
+ results = []
81
+ puts "#{" " * limiter}possible tokens for #{st} are: #{tokens.inspect}" if PT_DEBUG
82
+
83
+ tokens.each do |token|
84
+ found, out, rest = try_match(token, array)
85
+ puts "#{" " * limiter}matching #{token.inspect}" if PT_DEBUG
86
+
87
+ if found
88
+ if out != token
89
+ @out_token_map[token2url(token)] = out
90
+ end
91
+
92
+ puts "#{" " * limiter}found a token: #{token.inspect}, parsing rest: #{rest.inspect}" if PT_DEBUG
93
+ more = recursive_parse(rest.dup, possible_tokens, limiter + 1)
94
+ results = merge_results(results, token, more)
95
+
96
+ else
97
+ puts "#{" " * limiter}found none on this level, NOT parsing rest: #{rest.inspect}" if PT_DEBUG
98
+ more = recursive_parse(array.dup.slice(1..-1), possible_tokens, limiter + 1)
99
+ results = merge_results(results, nil, more)
100
+ end
101
+ end
102
+
103
+ if PT_DEBUG
104
+ puts "#{" " * limiter}results:"
105
+ results.each do |r|
106
+ puts "#{" " * limiter} #{r.inspect}"
107
+ end
108
+ end
109
+
110
+ results
111
+ end
112
+
113
+ def merge_results(results, found, other)
114
+ if other.empty?
115
+ unless found.nil?
116
+ results.push [found]
117
+ end
118
+ else
119
+ if found.nil?
120
+ other.each do |o|
121
+ results.push o
122
+ end
123
+ else
124
+ other.each do |o|
125
+ results.push [found] + o
126
+ end
127
+ end
128
+ end
129
+ results.map(&:uniq).uniq
130
+ end
131
+
132
+ def try_match(token, array)
133
+ found, out = [], []
134
+ rest = array.dup
135
+
136
+ token.each do |token_part|
137
+ url_part = rest.shift
138
+ break if url_part.nil?
139
+
140
+ if token_part == '*'
141
+ out.push url_part
142
+ found.push token_part
143
+
144
+ elsif token_part == url_part
145
+ found.push token_part
146
+ out.push token_part
147
+ end
148
+ end
149
+
150
+ if found == token
151
+ [true, out, rest]
152
+ else
153
+ [false, out, array]
154
+ end
155
+ end
156
+
157
+ def token2url(token)
158
+ token.join('-')
159
+ end
160
+ def url2token(url)
161
+ url.split("-")
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,3 @@
1
+ module RsPathTokenizer
2
+ VERSION = "0.2.0"
3
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+
3
+ describe RsPathTokenizer do
4
+ before :each do
5
+ @tokens_data = {
6
+ 'balashiha' => ['r', 'balashiha'],
7
+ 'gorodskoj-okrug-balashiha-1' => ['r', 'balashiha-1'],
8
+ 'gorodskoj-okrug-balashiha-11' => ['r', 'balashiha-11'],
9
+ 'balashiha-gorodskoj-okrug' => ['r', 'balashiha-gorodskoj-okrug'],
10
+ 'gorodskoj-okrug-drugoi' => ['r', 'gorodskoj-okrug-drugoi'],
11
+ 'expensive' => ['sort', 'expensive'],
12
+ }
13
+
14
+ @tokenizer = RsPathTokenizer::Tokenizer.new( @tokens_data )
15
+ end
16
+
17
+ it 'shows best results when tokens overlaps' do
18
+ results = {"balashiha" => ["r", "balashiha"], "gorodskoj-okrug-drugoi" => ["r", "gorodskoj-okrug-drugoi"]}
19
+
20
+ expect( @tokenizer.tokenize( 'balashiha-gorodskoj-okrug-drugoi' ) ).to eq results
21
+ end
22
+
23
+ it 'shows matched result' do
24
+ results = {"gorodskoj-okrug-balashiha-1" => ["r", "balashiha-1"]}
25
+ expect( @tokenizer.tokenize( 'gorodskoj-okrug-balashiha-1' ) ).to eq results
26
+
27
+ results = {"gorodskoj-okrug-balashiha-11" => ["r", "balashiha-11"]}
28
+ expect( @tokenizer.tokenize( 'gorodskoj-okrug-balashiha-11' ) ).to eq results
29
+ end
30
+
31
+ it 'returns price range' do
32
+ @tokens_data['price-*-*'] = ["price", nil]
33
+ @tokens_data['price-*'] = ["price", nil]
34
+
35
+ @tokenizer = RsPathTokenizer::Tokenizer.new( @tokens_data )
36
+
37
+ results = {"balashiha"=>["r", "balashiha"], 'price-*-*' => ["price", "0", "100"]}
38
+ expect( @tokenizer.tokenize( 'balashiha-price-0-100' ) ).to eq results
39
+
40
+ results = {"balashiha"=>["r", "balashiha"], 'price-*' => ["price", "100"]}
41
+ expect( @tokenizer.tokenize( 'balashiha-price-100' ) ).to eq results
42
+
43
+ results = {"balashiha"=>["r", "balashiha"], "price-*-*"=>["price", "0", "100"], "expensive"=>["sort", "expensive"]}
44
+ expect( @tokenizer.tokenize( 'balashiha-price-0-100-expensive' ) ).to eq results
45
+
46
+ results = {"balashiha"=>["r", "balashiha"], "price-*"=>["price", "100"], "expensive"=>["sort", "expensive"]}
47
+ expect( @tokenizer.tokenize( 'balashiha-price-100-expensive' ) ).to eq results
48
+ end
49
+
50
+ it 'nothing found' do
51
+ expect( @tokenizer.tokenize( 'incorrect-url' ) ).to eq nil
52
+ end
53
+
54
+ it 'too long url' do
55
+ expect{ @tokenizer.tokenize( 'a-b' * 501 ) }.to raise_error RsPathTokenizer::Error
56
+ end
57
+
58
+ it 'incorrect token' do
59
+ tokens_data = {'*' => ["all", true]}
60
+ expect{ RsPathTokenizer::Tokenizer.new( tokens_data ) }.to raise_error RsPathTokenizer::Error
61
+ end
62
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'rs_path_tokenizer'
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rs_path_tokenizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - glebtv
8
+ - Sergey Malykh
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-03-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ description: Tokenize path from predefined tokens.
57
+ email:
58
+ - xronos.i.am@gmail.com
59
+ executables:
60
+ - console
61
+ - setup
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - ".editorconfig"
66
+ - ".gitignore"
67
+ - ".rspec"
68
+ - ".ruby-gemset"
69
+ - ".ruby-version"
70
+ - ".travis.yml"
71
+ - Gemfile
72
+ - LICENSE.txt
73
+ - README.md
74
+ - Rakefile
75
+ - bin/console
76
+ - bin/setup
77
+ - lib/rs_path_tokenizer.rb
78
+ - lib/rs_path_tokenizer/error.rb
79
+ - lib/rs_path_tokenizer/tokenizer.rb
80
+ - lib/rs_path_tokenizer/version.rb
81
+ - spec/rs_path_tokenizer_spec.rb
82
+ - spec/spec_helper.rb
83
+ homepage: http://github.com/xronos-i-am/rs_path_tokenizer
84
+ licenses:
85
+ - MIT
86
+ metadata: {}
87
+ post_install_message:
88
+ rdoc_options: []
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ requirements: []
102
+ rubyforge_project:
103
+ rubygems_version: 2.4.6
104
+ signing_key:
105
+ specification_version: 4
106
+ summary: URL path tokenizer.
107
+ test_files:
108
+ - spec/rs_path_tokenizer_spec.rb
109
+ - spec/spec_helper.rb