blurrily 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,6 +42,12 @@ int blurrily_storage_load(trigram_map* haystack, const char* path);
42
42
  */
43
43
  int blurrily_storage_close(trigram_map* haystack);
44
44
 
45
+ /*
46
+ Mark resources managed by Ruby GC.
47
+ */
48
+ void blurrily_storage_mark(trigram_map haystack);
49
+
50
+
45
51
  /*
46
52
  Persist to disk what <blurrily_storage_new> or <blurrily_storage_open>
47
53
  gave you.
@@ -3,7 +3,6 @@
3
3
  #include <stdio.h>
4
4
  #include <math.h>
5
5
  #include "tokeniser.h"
6
- #include "log.h"
7
6
  #include "blurrily.h"
8
7
 
9
8
 
@@ -59,19 +58,19 @@ static int blurrily_compare_trigrams(const void* left_p, const void* right_p)
59
58
 
60
59
  int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
61
60
  {
62
- int length = strlen(input);
63
- char* normalized = (char*) malloc(length+5);
64
- int duplicates = 0;
61
+ size_t length = strlen(input);
62
+ char* normalized = (char*) malloc(length+5);
63
+ size_t duplicates = 0;
65
64
 
66
65
  snprintf(normalized, length+4, "**%s*", input);
67
66
 
68
67
  /* replace spaces with '*' */
69
- for (int k = 0; k < length+3; ++k) {
68
+ for (size_t k = 0; k < length+3; ++k) {
70
69
  if (normalized[k] == ' ') normalized[k] = '*';
71
70
  }
72
71
 
73
72
  /* compute trigrams */
74
- for (int k = 0; k <= length; ++k) {
73
+ for (size_t k = 0; k <= length; ++k) {
75
74
  string_to_code(normalized+k, output+k);
76
75
  }
77
76
 
@@ -79,7 +78,7 @@ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
79
78
  LOG("-- normalization\n");
80
79
  LOG("%s -> %s\n", input, normalized);
81
80
  LOG("-- tokenisation\n");
82
- for (int k = 0; k <= length; ++k) {
81
+ for (size_t k = 0; k <= length; ++k) {
83
82
  char res[4];
84
83
 
85
84
  code_to_string(output[k], res);
@@ -94,7 +93,7 @@ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
94
93
  qsort((void*)output, length+1, sizeof(trigram_t), &blurrily_compare_trigrams);
95
94
 
96
95
  /* remove duplicates */
97
- for (int k = 1; k <= length; ++k) {
96
+ for (size_t k = 1; k <= length; ++k) {
98
97
  trigram_t* previous = output + k - 1;
99
98
  trigram_t* current = output + k;
100
99
 
@@ -109,14 +108,14 @@ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
109
108
 
110
109
  /* print again */
111
110
  LOG("-- after sort/compact\n");
112
- for (int k = 0; k <= length-duplicates; ++k) {
111
+ for (size_t k = 0; k <= length-duplicates; ++k) {
113
112
  char res[4];
114
113
  code_to_string(output[k], res);
115
114
  LOG("%d -> %s\n", output[k], res);
116
115
  }
117
116
 
118
117
  free((void*)normalized);
119
- return length+1 - duplicates;
118
+ return (int) (length + 1 - duplicates);
120
119
  }
121
120
 
122
121
  /******************************************************************************/
@@ -1,3 +1 @@
1
- require "blurrily/map_ext"
2
- require "blurrily/map"
3
- require "blurrily/version"
1
+ require 'blurrily/version'
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+
3
+ require 'socket'
4
+ require 'ipaddr'
5
+ require 'blurrily/defaults'
6
+
7
+ module Blurrily
8
+ class Client
9
+ Error = Class.new(RuntimeError)
10
+
11
+ # Public: Initialize a new Blurrily::Client connection to Blurrily::Server.
12
+ #
13
+ # host - IP Address or FQDN of the Blurrily::Server.
14
+ # Defaults to Blurrily::DEFAULT_HOST.
15
+ # port - Port Blurrily::Server is listening on.
16
+ # Defaults to Blurrily::DEFAULT_PORT.
17
+ # db_name - Name of the data store being targeted.
18
+ # Defaults to Blurrily::DEFAULT_DATABASE.
19
+ #
20
+ # Examples
21
+ #
22
+ # Blurrily::Client.new('127.0.0.1', 12021, 'location_en')
23
+ # # => #<Blurrily::Client:0x007fcd0d33e708 @host="127.0.0.1", @port=12021, @db_name="location_en">
24
+ #
25
+ # Returns the instance of Blurrily::Client
26
+ def initialize(options = {})
27
+ @host = options.fetch(:host, DEFAULT_HOST)
28
+ @port = options.fetch(:port, DEFAULT_PORT)
29
+ @db_name = options.fetch(:db_name, DEFAULT_DATABASE)
30
+ end
31
+
32
+ # Public: Find record references based on a given string (needle)
33
+ #
34
+ # needle - The string you're searching for matches on.
35
+ # Must not contain tabs.
36
+ # Required
37
+ # limit - Limit the number of results retruned (default: 10).
38
+ # MUST be numeric.
39
+ # Optional
40
+ #
41
+ # Examples
42
+ #
43
+ # @client.find('London')
44
+ # # => [[123,6,3],[124,5,3]...]
45
+ #
46
+ # Returns an Array of matching [REF,SCORE,WEIGHT] ordered by score. REF is the identifying value of the original record.
47
+ def find(needle, limit = nil)
48
+ limit ||= LIMIT_DEFAULT
49
+ check_valid_needle(needle)
50
+ raise(ArgumentError, "LIMIT value must be in #{LIMIT_RANGE}") unless LIMIT_RANGE.include?(limit)
51
+
52
+ cmd = ["FIND", @db_name, needle, limit]
53
+ send_cmd_and_get_results(cmd).map(&:to_i)
54
+ end
55
+
56
+ # Public: Index a given record.
57
+ #
58
+ # db_name - The name of the data store being targeted. Required
59
+ # needle - The string you wish to index. Must not contain tabs. Required
60
+ # ref - The indentifying value of the record being indexed. Must be numeric. Required
61
+ # weight - Weight of this particular reference. Default 0. Don't change unless you know what you're doing. Optional.
62
+ #
63
+ # Examples
64
+ #
65
+ # @client.put('location_en', 'London', 123, 0)
66
+ # # => OK
67
+ #
68
+ # Returns something to let you know that all is well.
69
+ def put(needle, ref, weight = 0)
70
+ check_valid_needle(needle)
71
+ check_valid_ref(ref)
72
+ raise(ArgumentError, "WEIGHT value must be in #{WEIGHT_RANGE}") unless WEIGHT_RANGE.include?(weight)
73
+
74
+ cmd = ["PUT", @db_name, needle, ref, weight]
75
+ send_cmd_and_get_results(cmd)
76
+ return
77
+ end
78
+
79
+ def delete(ref)
80
+ check_valid_ref(ref)
81
+ cmd = ['DELETE', @db_name, ref]
82
+ send_cmd_and_get_results(cmd)
83
+ return
84
+ end
85
+
86
+ def clear()
87
+ send_cmd_and_get_results(['CLEAR', @db_name])
88
+ return
89
+ end
90
+
91
+
92
+ private
93
+
94
+
95
+ PORT_RANGE = 1025..32768
96
+
97
+ def check_valid_needle(needle)
98
+ raise(ArgumentError, "bad needle") if !needle.kind_of?(String) || needle.empty? || needle.include?("\t")
99
+ end
100
+
101
+ def check_valid_ref(ref)
102
+ raise(ArgumentError, "REF value must be in #{REF_RANGE}") unless REF_RANGE.include?(ref)
103
+ end
104
+
105
+
106
+ def connection
107
+ @connection ||= TCPSocket.new(@host, @port)
108
+ end
109
+
110
+ def send_cmd_and_get_results(argv)
111
+ output = argv.join("\t")
112
+ connection.puts output
113
+ input = connection.gets
114
+ case input
115
+ when "OK\n"
116
+ return []
117
+ when /^OK\t(.*)\n/
118
+ return $1.split("\t")
119
+ when /^ERROR\t(.*)\n/
120
+ raise Error, $1
121
+ when nil
122
+ raise Error, 'Server disconnected'
123
+ else
124
+ raise Error, 'Server did not respect protocol'
125
+ end
126
+ end
127
+
128
+ end
129
+ end
@@ -0,0 +1,54 @@
1
+ # encoding: utf-8
2
+ require 'blurrily/defaults'
3
+
4
+ module Blurrily
5
+ class CommandProcessor
6
+ ProtocolError = Class.new(StandardError)
7
+
8
+ def initialize(map_group)
9
+ @map_group = map_group
10
+ end
11
+
12
+ def process_command(line)
13
+ command, map_name, *args = line.split(/\t/)
14
+ raise ProtocolError, 'Unknown command' unless COMMANDS.include? command
15
+ raise ProtocolError, 'Invalid database name' unless map_name =~ /^[a-z_]+$/
16
+ result = send("on_#{command}", map_name, *args)
17
+ ['OK', *result].compact.join("\t")
18
+ rescue ArgumentError, ProtocolError => e
19
+ ['ERROR', e.message].join("\t")
20
+ end
21
+
22
+ private
23
+
24
+ COMMANDS = %w(FIND PUT DELETE CLEAR)
25
+
26
+ def on_PUT(map_name, needle, ref, weight = nil)
27
+ raise ProtocolError, 'Invalid reference' unless ref =~ /^\d+$/ && REF_RANGE.include?(ref.to_i)
28
+ raise ProtocolError, 'Invalid weight' unless weight.nil? || (weight =~ /^\d+$/ && WEIGHT_RANGE.include?(weight.to_i))
29
+
30
+ @map_group.map(map_name).put(*[needle, ref.to_i, weight.to_i].compact)
31
+ return
32
+ end
33
+
34
+ def on_DELETE(map_name, ref)
35
+ raise ProtocolError, 'Invalid reference' unless ref =~ /^\d+$/ && REF_RANGE.include?(ref.to_i)
36
+
37
+ @map_group.map(map_name).delete(ref.to_i)
38
+ return
39
+ end
40
+
41
+ def on_FIND(map_name, needle, limit = nil)
42
+ raise ProtocolError, 'Limit must be a number' if limit && !LIMIT_RANGE.include?(limit.to_i)
43
+
44
+ results = @map_group.map(map_name).find(*[needle, limit && limit.to_i].compact)
45
+ refs = results.map{ |result| result.first }
46
+ return refs
47
+ end
48
+
49
+ def on_CLEAR(map_name)
50
+ @map_group.clear(map_name)
51
+ return
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,10 @@
1
+ module Blurrily
2
+ DEFAULT_HOST = 'localhost'
3
+ DEFAULT_PORT = 12021
4
+ DEFAULT_DATABASE = 'words'
5
+
6
+ LIMIT_DEFAULT = 10
7
+ LIMIT_RANGE = 1..1024
8
+ REF_RANGE = 1..(1<<31)
9
+ WEIGHT_RANGE = 0..(1<<31)
10
+ end
@@ -1,10 +1,12 @@
1
1
  require 'blurrily/map_ext'
2
- require 'active_support/all' # fixme: we only need enough to get mb_chars and alias_method_chain in
2
+ require 'active_support/core_ext/module/aliasing' # alias_method_chain
3
+ require 'active_support/core_ext/string/multibyte' # mb_chars
3
4
 
4
5
  module Blurrily
5
6
  Map.class_eval do
6
7
 
7
- def put_with_string_normalize(needle, reference, weight=0)
8
+ def put_with_string_normalize(needle, reference, weight=nil)
9
+ weight ||= 0
8
10
  needle = normalize_string needle
9
11
  put_without_string_normalize(needle, reference, weight)
10
12
  end
@@ -25,7 +27,8 @@ module Blurrily
25
27
  def normalize_string(needle)
26
28
  result = needle.downcase
27
29
  unless result =~ /^([a-z ])+$/
28
- result = result.mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
30
+ result = ActiveSupport::Multibyte::Chars.new(result).mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
31
+ # result = result.mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
29
32
  end
30
33
  result.gsub(/\s+/,' ').strip
31
34
  end
@@ -0,0 +1,39 @@
1
+ require 'pathname'
2
+ require 'blurrily/map'
3
+
4
+ module Blurrily
5
+ class MapGroup
6
+
7
+ def initialize(directory = nil)
8
+ @directory = Pathname.new(directory || Dir.pwd)
9
+ @maps = {}
10
+ end
11
+
12
+ def map(name)
13
+ @maps[name] ||= load_map(name) || Map.new
14
+ end
15
+
16
+ def save
17
+ @directory.mkpath
18
+ @maps.each do |name, map|
19
+ map.save(path_for(name).to_s)
20
+ end
21
+ end
22
+
23
+ def clear(name)
24
+ @maps[name] = Map.new
25
+ end
26
+
27
+ private
28
+
29
+ def load_map(name)
30
+ Map.load(path_for(name).to_s)
31
+ rescue Errno::ENOENT
32
+ nil
33
+ end
34
+
35
+ def path_for(name)
36
+ @directory.join("#{name}.trigrams")
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,49 @@
1
+ require 'eventmachine'
2
+ require 'blurrily/defaults'
3
+ require 'blurrily/command_processor'
4
+ require 'blurrily/map_group'
5
+
6
+ module Blurrily
7
+ class Server
8
+
9
+ def initialize(options)
10
+ @host = options.fetch(:host, '0.0.0.0')
11
+ @port = options.fetch(:port, Blurrily::DEFAULT_PORT)
12
+ directory = options.fetch(:directory, Dir.pwd)
13
+
14
+ @map_group = MapGroup.new(directory)
15
+ @command_processor = CommandProcessor.new(@map_group)
16
+ end
17
+
18
+ def start
19
+ EventMachine.run do
20
+ # hit Control + C to stop
21
+ Signal.trap("INT") { EventMachine.stop }
22
+ Signal.trap("TERM") { EventMachine.stop }
23
+
24
+ saver = proc { @map_group.save }
25
+ EventMachine.add_periodic_timer(60, &saver)
26
+ EventMachine.add_shutdown_hook(&saver)
27
+ Signal.trap("USR1", &saver)
28
+
29
+ EventMachine.start_server(@host, @port, Handler, @command_processor)
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ module Handler
36
+ def initialize(processor)
37
+ @processor = processor
38
+ end
39
+
40
+ def receive_data(data)
41
+ data.split("\n").each do |line|
42
+ output = @processor.process_command(line.strip)
43
+ output << "\n"
44
+ send_data(output)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,3 +1,3 @@
1
1
  module Blurrily
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blurrily
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julien Letessier
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-27 00:00:00.000000000 Z
11
+ date: 2013-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -150,28 +150,105 @@ dependencies:
150
150
  - - '>='
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: benchmark-ips
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - '>='
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: guard
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - '>='
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: guard-rspec
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - '>='
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - '>='
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: rb-fsevent
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - '>='
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - '>='
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: terminal-notifier-guard
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - '>='
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - '>='
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
153
223
  description: Native fuzzy string search
154
224
  email:
155
225
  - julien.letessier@gmail.com
156
- executables: []
226
+ executables:
227
+ - blurrily
157
228
  extensions:
158
229
  - ext/blurrily/extconf.rb
159
230
  extra_rdoc_files: []
160
231
  files:
232
+ - lib/blurrily/client.rb
233
+ - lib/blurrily/command_processor.rb
234
+ - lib/blurrily/defaults.rb
161
235
  - lib/blurrily/map.rb
236
+ - lib/blurrily/map_group.rb
162
237
  - lib/blurrily/server.rb
163
238
  - lib/blurrily/version.rb
164
239
  - lib/blurrily.rb
165
240
  - ext/blurrily/map_ext.c
241
+ - ext/blurrily/search_tree.c
166
242
  - ext/blurrily/storage.c
167
243
  - ext/blurrily/tokeniser.c
168
244
  - ext/blurrily/blurrily.h
169
- - ext/blurrily/log.h
245
+ - ext/blurrily/search_tree.h
170
246
  - ext/blurrily/storage.h
171
247
  - ext/blurrily/tokeniser.h
172
248
  - ext/blurrily/extconf.rb
173
249
  - README.md
174
250
  - LICENSE.txt
251
+ - bin/blurrily
175
252
  homepage: http://github.com/mezis/blurrily
176
253
  licenses: []
177
254
  metadata: {}