blurrily 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -42,6 +42,12 @@ int blurrily_storage_load(trigram_map* haystack, const char* path);
42
42
  */
43
43
  int blurrily_storage_close(trigram_map* haystack);
44
44
 
45
+ /*
46
+ Mark resources managed by Ruby GC.
47
+ */
48
+ void blurrily_storage_mark(trigram_map haystack);
49
+
50
+
45
51
  /*
46
52
  Persist to disk what <blurrily_storage_new> or <blurrily_storage_open>
47
53
  gave you.
@@ -3,7 +3,6 @@
3
3
  #include <stdio.h>
4
4
  #include <math.h>
5
5
  #include "tokeniser.h"
6
- #include "log.h"
7
6
  #include "blurrily.h"
8
7
 
9
8
 
@@ -59,19 +58,19 @@ static int blurrily_compare_trigrams(const void* left_p, const void* right_p)
59
58
 
60
59
  int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
61
60
  {
62
- int length = strlen(input);
63
- char* normalized = (char*) malloc(length+5);
64
- int duplicates = 0;
61
+ size_t length = strlen(input);
62
+ char* normalized = (char*) malloc(length+5);
63
+ size_t duplicates = 0;
65
64
 
66
65
  snprintf(normalized, length+4, "**%s*", input);
67
66
 
68
67
  /* replace spaces with '*' */
69
- for (int k = 0; k < length+3; ++k) {
68
+ for (size_t k = 0; k < length+3; ++k) {
70
69
  if (normalized[k] == ' ') normalized[k] = '*';
71
70
  }
72
71
 
73
72
  /* compute trigrams */
74
- for (int k = 0; k <= length; ++k) {
73
+ for (size_t k = 0; k <= length; ++k) {
75
74
  string_to_code(normalized+k, output+k);
76
75
  }
77
76
 
@@ -79,7 +78,7 @@ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
79
78
  LOG("-- normalization\n");
80
79
  LOG("%s -> %s\n", input, normalized);
81
80
  LOG("-- tokenisation\n");
82
- for (int k = 0; k <= length; ++k) {
81
+ for (size_t k = 0; k <= length; ++k) {
83
82
  char res[4];
84
83
 
85
84
  code_to_string(output[k], res);
@@ -94,7 +93,7 @@ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
94
93
  qsort((void*)output, length+1, sizeof(trigram_t), &blurrily_compare_trigrams);
95
94
 
96
95
  /* remove duplicates */
97
- for (int k = 1; k <= length; ++k) {
96
+ for (size_t k = 1; k <= length; ++k) {
98
97
  trigram_t* previous = output + k - 1;
99
98
  trigram_t* current = output + k;
100
99
 
@@ -109,14 +108,14 @@ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
109
108
 
110
109
  /* print again */
111
110
  LOG("-- after sort/compact\n");
112
- for (int k = 0; k <= length-duplicates; ++k) {
111
+ for (size_t k = 0; k <= length-duplicates; ++k) {
113
112
  char res[4];
114
113
  code_to_string(output[k], res);
115
114
  LOG("%d -> %s\n", output[k], res);
116
115
  }
117
116
 
118
117
  free((void*)normalized);
119
- return length+1 - duplicates;
118
+ return (int) (length + 1 - duplicates);
120
119
  }
121
120
 
122
121
  /******************************************************************************/
@@ -1,3 +1 @@
1
- require "blurrily/map_ext"
2
- require "blurrily/map"
3
- require "blurrily/version"
1
+ require 'blurrily/version'
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+
3
+ require 'socket'
4
+ require 'ipaddr'
5
+ require 'blurrily/defaults'
6
+
7
+ module Blurrily
8
+ class Client
9
+ Error = Class.new(RuntimeError)
10
+
11
+ # Public: Initialize a new Blurrily::Client connection to Blurrily::Server.
12
+ #
13
+ # host - IP Address or FQDN of the Blurrily::Server.
14
+ # Defaults to Blurrily::DEFAULT_HOST.
15
+ # port - Port Blurrily::Server is listening on.
16
+ # Defaults to Blurrily::DEFAULT_PORT.
17
+ # db_name - Name of the data store being targeted.
18
+ # Defaults to Blurrily::DEFAULT_DATABASE.
19
+ #
20
+ # Examples
21
+ #
22
+ # Blurrily::Client.new('127.0.0.1', 12021, 'location_en')
23
+ # # => #<Blurrily::Client:0x007fcd0d33e708 @host="127.0.0.1", @port=12021, @db_name="location_en">
24
+ #
25
+ # Returns the instance of Blurrily::Client
26
+ def initialize(options = {})
27
+ @host = options.fetch(:host, DEFAULT_HOST)
28
+ @port = options.fetch(:port, DEFAULT_PORT)
29
+ @db_name = options.fetch(:db_name, DEFAULT_DATABASE)
30
+ end
31
+
32
+ # Public: Find record references based on a given string (needle)
33
+ #
34
+ # needle - The string you're searching for matches on.
35
+ # Must not contain tabs.
36
+ # Required
37
+ # limit - Limit the number of results retruned (default: 10).
38
+ # MUST be numeric.
39
+ # Optional
40
+ #
41
+ # Examples
42
+ #
43
+ # @client.find('London')
44
+ # # => [[123,6,3],[124,5,3]...]
45
+ #
46
+ # Returns an Array of matching [REF,SCORE,WEIGHT] ordered by score. REF is the identifying value of the original record.
47
+ def find(needle, limit = nil)
48
+ limit ||= LIMIT_DEFAULT
49
+ check_valid_needle(needle)
50
+ raise(ArgumentError, "LIMIT value must be in #{LIMIT_RANGE}") unless LIMIT_RANGE.include?(limit)
51
+
52
+ cmd = ["FIND", @db_name, needle, limit]
53
+ send_cmd_and_get_results(cmd).map(&:to_i)
54
+ end
55
+
56
+ # Public: Index a given record.
57
+ #
58
+ # db_name - The name of the data store being targeted. Required
59
+ # needle - The string you wish to index. Must not contain tabs. Required
60
+ # ref - The indentifying value of the record being indexed. Must be numeric. Required
61
+ # weight - Weight of this particular reference. Default 0. Don't change unless you know what you're doing. Optional.
62
+ #
63
+ # Examples
64
+ #
65
+ # @client.put('location_en', 'London', 123, 0)
66
+ # # => OK
67
+ #
68
+ # Returns something to let you know that all is well.
69
+ def put(needle, ref, weight = 0)
70
+ check_valid_needle(needle)
71
+ check_valid_ref(ref)
72
+ raise(ArgumentError, "WEIGHT value must be in #{WEIGHT_RANGE}") unless WEIGHT_RANGE.include?(weight)
73
+
74
+ cmd = ["PUT", @db_name, needle, ref, weight]
75
+ send_cmd_and_get_results(cmd)
76
+ return
77
+ end
78
+
79
+ def delete(ref)
80
+ check_valid_ref(ref)
81
+ cmd = ['DELETE', @db_name, ref]
82
+ send_cmd_and_get_results(cmd)
83
+ return
84
+ end
85
+
86
+ def clear()
87
+ send_cmd_and_get_results(['CLEAR', @db_name])
88
+ return
89
+ end
90
+
91
+
92
+ private
93
+
94
+
95
+ PORT_RANGE = 1025..32768
96
+
97
+ def check_valid_needle(needle)
98
+ raise(ArgumentError, "bad needle") if !needle.kind_of?(String) || needle.empty? || needle.include?("\t")
99
+ end
100
+
101
+ def check_valid_ref(ref)
102
+ raise(ArgumentError, "REF value must be in #{REF_RANGE}") unless REF_RANGE.include?(ref)
103
+ end
104
+
105
+
106
+ def connection
107
+ @connection ||= TCPSocket.new(@host, @port)
108
+ end
109
+
110
+ def send_cmd_and_get_results(argv)
111
+ output = argv.join("\t")
112
+ connection.puts output
113
+ input = connection.gets
114
+ case input
115
+ when "OK\n"
116
+ return []
117
+ when /^OK\t(.*)\n/
118
+ return $1.split("\t")
119
+ when /^ERROR\t(.*)\n/
120
+ raise Error, $1
121
+ when nil
122
+ raise Error, 'Server disconnected'
123
+ else
124
+ raise Error, 'Server did not respect protocol'
125
+ end
126
+ end
127
+
128
+ end
129
+ end
@@ -0,0 +1,54 @@
1
+ # encoding: utf-8
2
+ require 'blurrily/defaults'
3
+
4
+ module Blurrily
5
+ class CommandProcessor
6
+ ProtocolError = Class.new(StandardError)
7
+
8
+ def initialize(map_group)
9
+ @map_group = map_group
10
+ end
11
+
12
+ def process_command(line)
13
+ command, map_name, *args = line.split(/\t/)
14
+ raise ProtocolError, 'Unknown command' unless COMMANDS.include? command
15
+ raise ProtocolError, 'Invalid database name' unless map_name =~ /^[a-z_]+$/
16
+ result = send("on_#{command}", map_name, *args)
17
+ ['OK', *result].compact.join("\t")
18
+ rescue ArgumentError, ProtocolError => e
19
+ ['ERROR', e.message].join("\t")
20
+ end
21
+
22
+ private
23
+
24
+ COMMANDS = %w(FIND PUT DELETE CLEAR)
25
+
26
+ def on_PUT(map_name, needle, ref, weight = nil)
27
+ raise ProtocolError, 'Invalid reference' unless ref =~ /^\d+$/ && REF_RANGE.include?(ref.to_i)
28
+ raise ProtocolError, 'Invalid weight' unless weight.nil? || (weight =~ /^\d+$/ && WEIGHT_RANGE.include?(weight.to_i))
29
+
30
+ @map_group.map(map_name).put(*[needle, ref.to_i, weight.to_i].compact)
31
+ return
32
+ end
33
+
34
+ def on_DELETE(map_name, ref)
35
+ raise ProtocolError, 'Invalid reference' unless ref =~ /^\d+$/ && REF_RANGE.include?(ref.to_i)
36
+
37
+ @map_group.map(map_name).delete(ref.to_i)
38
+ return
39
+ end
40
+
41
+ def on_FIND(map_name, needle, limit = nil)
42
+ raise ProtocolError, 'Limit must be a number' if limit && !LIMIT_RANGE.include?(limit.to_i)
43
+
44
+ results = @map_group.map(map_name).find(*[needle, limit && limit.to_i].compact)
45
+ refs = results.map{ |result| result.first }
46
+ return refs
47
+ end
48
+
49
+ def on_CLEAR(map_name)
50
+ @map_group.clear(map_name)
51
+ return
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,10 @@
1
+ module Blurrily
2
+ DEFAULT_HOST = 'localhost'
3
+ DEFAULT_PORT = 12021
4
+ DEFAULT_DATABASE = 'words'
5
+
6
+ LIMIT_DEFAULT = 10
7
+ LIMIT_RANGE = 1..1024
8
+ REF_RANGE = 1..(1<<31)
9
+ WEIGHT_RANGE = 0..(1<<31)
10
+ end
@@ -1,10 +1,12 @@
1
1
  require 'blurrily/map_ext'
2
- require 'active_support/all' # fixme: we only need enough to get mb_chars and alias_method_chain in
2
+ require 'active_support/core_ext/module/aliasing' # alias_method_chain
3
+ require 'active_support/core_ext/string/multibyte' # mb_chars
3
4
 
4
5
  module Blurrily
5
6
  Map.class_eval do
6
7
 
7
- def put_with_string_normalize(needle, reference, weight=0)
8
+ def put_with_string_normalize(needle, reference, weight=nil)
9
+ weight ||= 0
8
10
  needle = normalize_string needle
9
11
  put_without_string_normalize(needle, reference, weight)
10
12
  end
@@ -25,7 +27,8 @@ module Blurrily
25
27
  def normalize_string(needle)
26
28
  result = needle.downcase
27
29
  unless result =~ /^([a-z ])+$/
28
- result = result.mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
30
+ result = ActiveSupport::Multibyte::Chars.new(result).mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
31
+ # result = result.mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
29
32
  end
30
33
  result.gsub(/\s+/,' ').strip
31
34
  end
@@ -0,0 +1,39 @@
1
+ require 'pathname'
2
+ require 'blurrily/map'
3
+
4
+ module Blurrily
5
+ class MapGroup
6
+
7
+ def initialize(directory = nil)
8
+ @directory = Pathname.new(directory || Dir.pwd)
9
+ @maps = {}
10
+ end
11
+
12
+ def map(name)
13
+ @maps[name] ||= load_map(name) || Map.new
14
+ end
15
+
16
+ def save
17
+ @directory.mkpath
18
+ @maps.each do |name, map|
19
+ map.save(path_for(name).to_s)
20
+ end
21
+ end
22
+
23
+ def clear(name)
24
+ @maps[name] = Map.new
25
+ end
26
+
27
+ private
28
+
29
+ def load_map(name)
30
+ Map.load(path_for(name).to_s)
31
+ rescue Errno::ENOENT
32
+ nil
33
+ end
34
+
35
+ def path_for(name)
36
+ @directory.join("#{name}.trigrams")
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,49 @@
1
+ require 'eventmachine'
2
+ require 'blurrily/defaults'
3
+ require 'blurrily/command_processor'
4
+ require 'blurrily/map_group'
5
+
6
+ module Blurrily
7
+ class Server
8
+
9
+ def initialize(options)
10
+ @host = options.fetch(:host, '0.0.0.0')
11
+ @port = options.fetch(:port, Blurrily::DEFAULT_PORT)
12
+ directory = options.fetch(:directory, Dir.pwd)
13
+
14
+ @map_group = MapGroup.new(directory)
15
+ @command_processor = CommandProcessor.new(@map_group)
16
+ end
17
+
18
+ def start
19
+ EventMachine.run do
20
+ # hit Control + C to stop
21
+ Signal.trap("INT") { EventMachine.stop }
22
+ Signal.trap("TERM") { EventMachine.stop }
23
+
24
+ saver = proc { @map_group.save }
25
+ EventMachine.add_periodic_timer(60, &saver)
26
+ EventMachine.add_shutdown_hook(&saver)
27
+ Signal.trap("USR1", &saver)
28
+
29
+ EventMachine.start_server(@host, @port, Handler, @command_processor)
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ module Handler
36
+ def initialize(processor)
37
+ @processor = processor
38
+ end
39
+
40
+ def receive_data(data)
41
+ data.split("\n").each do |line|
42
+ output = @processor.process_command(line.strip)
43
+ output << "\n"
44
+ send_data(output)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,3 +1,3 @@
1
1
  module Blurrily
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blurrily
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julien Letessier
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-27 00:00:00.000000000 Z
11
+ date: 2013-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -150,28 +150,105 @@ dependencies:
150
150
  - - '>='
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: benchmark-ips
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - '>='
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: guard
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - '>='
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: guard-rspec
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - '>='
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - '>='
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: rb-fsevent
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - '>='
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - '>='
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: terminal-notifier-guard
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - '>='
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - '>='
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
153
223
  description: Native fuzzy string search
154
224
  email:
155
225
  - julien.letessier@gmail.com
156
- executables: []
226
+ executables:
227
+ - blurrily
157
228
  extensions:
158
229
  - ext/blurrily/extconf.rb
159
230
  extra_rdoc_files: []
160
231
  files:
232
+ - lib/blurrily/client.rb
233
+ - lib/blurrily/command_processor.rb
234
+ - lib/blurrily/defaults.rb
161
235
  - lib/blurrily/map.rb
236
+ - lib/blurrily/map_group.rb
162
237
  - lib/blurrily/server.rb
163
238
  - lib/blurrily/version.rb
164
239
  - lib/blurrily.rb
165
240
  - ext/blurrily/map_ext.c
241
+ - ext/blurrily/search_tree.c
166
242
  - ext/blurrily/storage.c
167
243
  - ext/blurrily/tokeniser.c
168
244
  - ext/blurrily/blurrily.h
169
- - ext/blurrily/log.h
245
+ - ext/blurrily/search_tree.h
170
246
  - ext/blurrily/storage.h
171
247
  - ext/blurrily/tokeniser.h
172
248
  - ext/blurrily/extconf.rb
173
249
  - README.md
174
250
  - LICENSE.txt
251
+ - bin/blurrily
175
252
  homepage: http://github.com/mezis/blurrily
176
253
  licenses: []
177
254
  metadata: {}