lexster 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ require 'neography/tasks'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: :spec
data/TODO.md ADDED
@@ -0,0 +1,4 @@
1
+ # Lexster - To Do
2
+
3
+ * Execute queries/scripts from model and not Neography (e.g. `Movie.neo_gremlin(gremlin_query)` with query that outputs IDs, returns a list of `Movie`s)
4
+ * Rake task to index all nodes and relatiohsips in Neo4j
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "lexster/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "lexster"
7
+ s.version = Lexster::VERSION
8
+ s.authors = ["Nelson Wittwer, Elad Ossadon"]
9
+ s.email = ["nelsonwittwer@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{Titan for ActiveRecord}
12
+ s.description = %q{Extend Ruby on Rails ActiveRecord with Titan nodes. Keep RDBMS and utilize the power of Gremlin queries. Fork of Neoid.}
13
+
14
+ s.rubyforge_project = "lexster"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency 'rake'
22
+ s.add_development_dependency 'rspec'
23
+ s.add_development_dependency 'rest-client'
24
+ s.add_development_dependency 'activerecord'
25
+ s.add_development_dependency 'sqlite3'
26
+
27
+ s.add_runtime_dependency 'neography'
28
+ end
@@ -0,0 +1,239 @@
1
+ require 'neography'
2
+ require 'lexster/version'
3
+ require 'lexster/config'
4
+ require 'lexster/model_config'
5
+ require 'lexster/model_additions'
6
+ require 'lexster/search_session'
7
+ require 'lexster/node'
8
+ require 'lexster/relationship'
9
+ require 'lexster/batch'
10
+ require 'lexster/database_cleaner'
11
+ require 'lexster/railtie' if defined?(Rails)
12
+
13
+ module Lexster
14
+ DEFAULT_FULLTEXT_SEARCH_INDEX_NAME = :lexster_default_search_index
15
+ NODE_AUTO_INDEX_NAME = 'node_auto_index'
16
+ RELATIONSHIP_AUTO_INDEX_NAME = 'relationship_auto_index'
17
+ UNIQUE_ID_KEY = 'lexster_unique_id'
18
+
19
+ class << self
20
+ attr_accessor :db
21
+ attr_accessor :logger
22
+ attr_accessor :ref_node
23
+ attr_accessor :env_loaded
24
+ attr_reader :config
25
+
26
+ def node_models
27
+ @node_models ||= []
28
+ end
29
+
30
+ def relationship_models
31
+ @relationship_models ||= []
32
+ end
33
+
34
+ def config
35
+ @config ||= begin
36
+ c = Lexster::Config.new
37
+
38
+ # default
39
+ c.enable_subrefs = true
40
+ c.enable_per_model_indexes = false
41
+
42
+ c
43
+ end
44
+ end
45
+
46
+ def configure
47
+ yield config
48
+ end
49
+
50
+ def initialize_all
51
+ @env_loaded = true
52
+ logger.info "Lexster initialize_all"
53
+ initialize_relationships
54
+ initialize_server
55
+ end
56
+
57
+ def initialize_server
58
+ initialize_auto_index
59
+ initialize_subrefs
60
+ initialize_per_model_indexes
61
+ end
62
+
63
+ def db
64
+ raise "Must set Lexster.db with a Neography::Rest instance" unless @db
65
+ @db
66
+ end
67
+
68
+ def batch(options={}, &block)
69
+ Lexster::Batch.new(options, &block).run
70
+ end
71
+
72
+ def logger
73
+ @logger ||= Logger.new(ENV['NEOID_LOG'] ? ENV['NEOID_LOG_FILE'] || $stdout : '/dev/null')
74
+ end
75
+
76
+ def ref_node
77
+ @ref_node ||= Neography::Node.load(Lexster.db.get_root['self'])
78
+ end
79
+
80
+ def reset_cached_variables
81
+ initialize_subrefs
82
+ end
83
+
84
+ def clean_db(confirm)
85
+ puts "must call with confirm: Lexster.clean_db(:yes_i_am_sure)" and return unless confirm == :yes_i_am_sure
86
+ Lexster::NeoDatabaseCleaner.clean_db
87
+ end
88
+
89
+
90
+ def enabled=(flag)
91
+ Thread.current[:lexster_enabled] = flag
92
+ end
93
+
94
+ def enabled
95
+ flag = Thread.current[:lexster_enabled]
96
+ # flag should be set by the middleware. in case it wasn't (non-rails app or console), default it to true
97
+ flag.nil? ? true : flag
98
+ end
99
+ alias enabled? enabled
100
+
101
+ def use(flag=true)
102
+ old, self.enabled = enabled?, flag
103
+ yield if block_given?
104
+ ensure
105
+ self.enabled = old
106
+ end
107
+
108
+ def execute_script_or_add_to_batch(gremlin_query, script_vars)
109
+ if Lexster::Batch.current_batch
110
+ # returns a SingleResultPromiseProxy!
111
+ Lexster::Batch.current_batch << [:execute_script, gremlin_query, script_vars]
112
+ else
113
+ value = Lexster.db.execute_script(gremlin_query, script_vars)
114
+
115
+ value = yield(value) if block_given?
116
+
117
+ Lexster::BatchPromiseProxy.new(value)
118
+ end
119
+ end
120
+
121
+ # create a fulltext index if not exists
122
+ def ensure_default_fulltext_search_index
123
+ Lexster.db.create_node_index(DEFAULT_FULLTEXT_SEARCH_INDEX_NAME, 'fulltext', 'lucene') unless (indexes = Lexster.db.list_node_indexes) && indexes[DEFAULT_FULLTEXT_SEARCH_INDEX_NAME]
124
+ end
125
+
126
+ def search(types, term, options = {})
127
+ options = options.reverse_merge(limit: 15,match_type: "AND")
128
+
129
+ types = [*types]
130
+
131
+ query = []
132
+
133
+ types.each do |type|
134
+ query_for_type = []
135
+
136
+ query_for_type << "ar_type:#{type.name}"
137
+
138
+ case term
139
+ when String
140
+ search_in_fields = type.lexster_config.search_options.fulltext_fields.keys
141
+ next if search_in_fields.empty?
142
+ query_for_type << search_in_fields.map{ |field| generate_field_query(field, term, true, options[:match_type]) }.join(" OR ")
143
+ when Hash
144
+ term.each do |field, value|
145
+ query_for_type << generate_field_query(field, value, false)
146
+ end
147
+ end
148
+
149
+ query << "(#{query_for_type.join(") AND (")})"
150
+ end
151
+
152
+ query = "(#{query.join(") OR (")})"
153
+
154
+ logger.info "Lexster query #{query}"
155
+
156
+ gremlin_query = <<-GREMLIN
157
+ #{options[:before_query]}
158
+
159
+ idx = g.getRawGraph().index().forNodes('#{DEFAULT_FULLTEXT_SEARCH_INDEX_NAME}')
160
+ hits = idx.query('#{sanitize_query_for_gremlin(query)}')
161
+
162
+ hits = #{options[:limit] ? "hits.take(#{options[:limit]})" : "hits"}
163
+
164
+ #{options[:after_query]}
165
+ GREMLIN
166
+
167
+ logger.info "[NEOID] search:\n#{gremlin_query}"
168
+
169
+ results = Lexster.db.execute_script(gremlin_query)
170
+
171
+ SearchSession.new(results, *types)
172
+ end
173
+
174
+ private
175
+ def sanitize_term(term)
176
+ # TODO - case sensitive?
177
+ term.downcase
178
+ end
179
+
180
+ def sanitize_query_for_gremlin(query)
181
+ # TODO - case sensitive?
182
+ query.gsub("'", "\\\\'")
183
+ end
184
+
185
+ def generate_field_query(field, term, fulltext = false, match_type = "AND")
186
+ term = term.to_s if term
187
+ return "" if term.nil? || term.empty?
188
+
189
+ fulltext = fulltext ? "_fulltext" : nil
190
+ valid_match_types = %w( AND OR )
191
+ match_type = valid_match_types.delete(match_type)
192
+ raise "Invalid match_type option. Valid values are #{valid_match_types.join(',')}" unless match_type
193
+
194
+ "(" + term.split(/\s+/).reject(&:empty?).map{ |t| "#{field}#{fulltext}:#{sanitize_term(t)}" }.join(" #{match_type} ") + ")"
195
+ end
196
+
197
+ def initialize_relationships
198
+ logger.info "Lexster initialize_relationships"
199
+ relationship_models.each do |rel_model|
200
+ Relationship.initialize_relationship(rel_model)
201
+ end
202
+ end
203
+
204
+ def initialize_auto_index
205
+ logger.info "Lexster initialize_auto_index"
206
+ Lexster.db.set_node_auto_index_status(true)
207
+ Lexster.db.add_node_auto_index_property(UNIQUE_ID_KEY)
208
+
209
+ Lexster.db.set_relationship_auto_index_status(true)
210
+ Lexster.db.add_relationship_auto_index_property(UNIQUE_ID_KEY)
211
+ end
212
+
213
+ def initialize_subrefs
214
+ return unless config.enable_subrefs
215
+
216
+ node_models.each do |klass|
217
+ klass.reset_neo_subref_node
218
+ end
219
+
220
+ logger.info "Lexster initialize_subrefs"
221
+ batch do
222
+ node_models.each(&:neo_subref_node)
223
+ end.then do |results|
224
+ node_models.zip(results).each do |klass, subref|
225
+ klass.neo_subref_node = subref
226
+ end
227
+ end
228
+ end
229
+
230
+ def initialize_per_model_indexes
231
+ return unless config.enable_per_model_indexes
232
+
233
+ logger.info "Lexster initialize_subrefs"
234
+ batch do
235
+ node_models.each(&:neo_model_index)
236
+ end
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,168 @@
1
+ module Lexster
2
+ class Batch
3
+ def default_options=(value)
4
+ @default_options = value
5
+ end
6
+
7
+ def self.default_options
8
+ @default_options ||= { batch_size: 200, individual_promises: true }
9
+ end
10
+
11
+ def self.current_batch
12
+ Thread.current[:lexster_current_batch]
13
+ end
14
+
15
+ def self.current_batch=(batch)
16
+ Thread.current[:lexster_current_batch] = batch
17
+ end
18
+
19
+ def self.reset_current_batch
20
+ Thread.current[:lexster_current_batch] = nil
21
+ end
22
+
23
+ def initialize(options={}, &block)
24
+ if options.respond_to?(:call) && !block
25
+ block = options
26
+ options = {}
27
+ end
28
+
29
+ options.reverse_merge!(self.class.default_options)
30
+
31
+ @options = options
32
+ @block = block
33
+ end
34
+
35
+ def <<(command)
36
+ commands << command
37
+
38
+ if commands.length >= @options[:batch_size]
39
+ flush_batch
40
+ end
41
+
42
+ if @options[:individual_promises]
43
+ promise = SingleResultPromiseProxy.new(command)
44
+ thens << promise
45
+ promise
46
+ end
47
+ end
48
+
49
+ def commands
50
+ @commands ||= []
51
+ end
52
+
53
+ def thens
54
+ @thens ||= []
55
+ end
56
+
57
+ def count
58
+ @commands ? @commands.count : 0
59
+ end
60
+
61
+ def results
62
+ @results ||= []
63
+ end
64
+
65
+ def run
66
+ self.class.current_batch = self
67
+
68
+ begin
69
+ @block.call(self)
70
+ ensure
71
+ self.class.reset_current_batch
72
+ end
73
+
74
+ Lexster.logger.info "Lexster batch (#{commands.length} commands)"
75
+
76
+ flush_batch
77
+
78
+ BatchPromiseProxy.new(results)
79
+ end
80
+
81
+ private
82
+ def flush_batch
83
+ return [] if commands.empty?
84
+ current_results = nil
85
+
86
+ # results = Lexster.db.batch(*commands).collect { |result| result['body'] }
87
+
88
+ benchmark = Benchmark.measure {
89
+ current_results = Lexster.db.batch(*commands).collect { |result| result['body'] }
90
+ }
91
+ Lexster.logger.info "Lexster batch (#{commands.length} commands) - #{benchmark}"
92
+ commands.clear
93
+
94
+ process_results(current_results)
95
+
96
+ thens.zip(current_results).each { |t, result| t.perform(result) }
97
+
98
+ thens.clear
99
+
100
+ results.concat current_results
101
+ end
102
+
103
+ def process_results(results)
104
+ results.map! do |result|
105
+ return result unless result.is_a?(Hash) && result['self'] && result['self'][%r[^https?://.*/(node|relationship)/\d+]]
106
+
107
+ type = case $1
108
+ when 'node' then Lexster::Node
109
+ when 'relationship' then Lexster::Relationship
110
+ else return result
111
+ end
112
+
113
+ type.from_hash(result)
114
+ end
115
+ end
116
+ end
117
+
118
+ # returned from a full batch, after it has been executed,
119
+ # so a `.then` can be chained after the batch do ... end
120
+ # it proxies all methods to the result
121
+ class BatchPromiseProxy
122
+ def initialize(results)
123
+ @results = results
124
+ end
125
+
126
+ def method_missing(method, *args)
127
+ @results.send(method, *args)
128
+ end
129
+
130
+ def then
131
+ yield(@results)
132
+ end
133
+ end
134
+
135
+ # returned from adding (<<) an item to a batch in a batch block:
136
+ # Lexster.batch { |batch| (batch << [:neography_command, param]).is_a?(SingleResultPromiseProxy) }
137
+ # so a `.then` can be chained:
138
+ # Lexster.batch { |batch| (batch << [:neography_command, param]).then { |result| puts result } }
139
+ # the `then` is called once the batch is flushed with the result of the single job in the batch
140
+ # it proxies all methods to the result, so in case it is returned (like in Lexster.execute_script_or_add_to_batch)
141
+ # the result of the method will be proxied to the result from the batch. See Node#neo_save
142
+ class SingleResultPromiseProxy
143
+ def initialize(*args)
144
+ end
145
+
146
+ attr_accessor :result
147
+
148
+ def result
149
+ raise "Accessed result too soon" unless @result
150
+ @result
151
+ end
152
+
153
+ def method_missing(method, *args)
154
+ result.send(method, *args)
155
+ end
156
+
157
+ def then(&block)
158
+ @then = block
159
+ self
160
+ end
161
+
162
+ def perform(result)
163
+ @result = result
164
+ return unless @then
165
+ @then.call(result)
166
+ end
167
+ end
168
+ end