neoscout 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/etc/neo4j.yml ADDED
@@ -0,0 +1,16 @@
1
+ neostore.nodestore.db.mapped_memory: 400M
2
+ neostore.relationshipstore.db.mapped_memory: 1G
3
+ neostore.propertystore.db.mapped_memory: 4G
4
+ neostore.propertystore.db.index.mapped_memory: 50M
5
+ neostore.propertystore.db.index.keys.mapped_memory: 50M
6
+ neostore.propertystore.db.strings.mapped_memory: 500M
7
+ neostore.propertystore.db.arrays.mapped_memory: 500M
8
+
9
+ # Keeping the defaults here for reference
10
+ # neostore.nodestore.db.mapped_memory: 25M
11
+ # neostore.relationshipstore.db.mapped_memory: 50M
12
+ # neostore.propertystore.db.mapped_memory: 90M
13
+ # neostore.propertystore.db.index.mapped_memory: 1M
14
+ # neostore.propertystore.db.index.keys.mapped_memory: 1M
15
+ # neostore.propertystore.db.strings.mapped_memory: 130M
16
+ # neostore.propertystore.db.arrays.mapped_memory: 130M
data/lib/neoscout.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'json'
2
+ require 'neo4j'
3
+ require 'set'
4
+
5
+ require 'neoscout/version'
6
+
7
+ require 'neoscout/tools'
8
+ require 'neoscout/constraints'
9
+ require 'neoscout/model'
10
+ require 'neoscout/json_schema'
11
+ require 'neoscout/scout'
12
+ require 'neoscout/gdb_neo4j'
@@ -0,0 +1,35 @@
1
+ module NeoScout
2
+
3
+ module Constraints
4
+
5
+ class Constraint
6
+ def satisfied_by_node?(typer, node) ; satisfied_by?(typer, node) end
7
+ def satisfied_by_edge?(typer, edge) ; satisfied_by?(typer, edge) end
8
+
9
+ def satisfied_by?(typer, obj) ; raise NotImplementedError end
10
+ end
11
+
12
+ class PropConstraint < Constraint
13
+ attr_reader :name, :opt, :type
14
+
15
+ def initialize(args = {})
16
+ super(args)
17
+ @name = args[:name]
18
+ @opt = args[:opt]
19
+ @type = args[:type]
20
+
21
+ raise ArgumentError unless @name.kind_of? String
22
+ raise ArgumentError unless @name.length > 0
23
+ end
24
+
25
+ def to_s
26
+ opt_s = if @opt then " (opt.)" else '' end
27
+ type_s = if @type then ": #{@type}" else '' end
28
+
29
+ "#{@name}#{type_s}#{opt_s}"
30
+ end
31
+ end
32
+
33
+
34
+ end
35
+ end
@@ -0,0 +1,147 @@
1
+ module NeoScout
2
+
3
+ module GDB_Neo4j
4
+
5
+ module Constraints
6
+
7
+ class PropConstraint < NeoScout::Constraints::PropConstraint
8
+
9
+ def satisfied_by?(typer, obj)
10
+ if obj.property?(@name)
11
+ then satisfies_type?(typer, @type, obj[@name])
12
+ else self.opt
13
+ end
14
+ end
15
+
16
+ def satisfies_type?(typer, type, value)
17
+ if type then typer.valid_value?(type, value) else true end
18
+ end
19
+ end
20
+
21
+ end
22
+
23
+ class Typer < NeoScout::Typer
24
+
25
+ attr_accessor :type_attr
26
+ attr_accessor :nil_type
27
+ attr_accessor :value_type_table
28
+ attr_accessor :node_mapper
29
+ attr_accessor :edge_mapper
30
+
31
+ include TyperValueTableMixin
32
+
33
+ def initialize
34
+ @type_attr = '_classname'
35
+ @nil_type = '__NOTYPE__'
36
+ @node_mapper = nil
37
+ @edge_mapper = nil
38
+ @value_type_table = {}
39
+ end
40
+
41
+ def node_type(node)
42
+ props = node.props
43
+ return node_mapped(props[@type_attr]) if props.has_key? @type_attr
44
+ @nil_type
45
+ end
46
+
47
+ def edge_type(edge)
48
+ type = edge.rel_type
49
+ if type then edge_mapped(type.to_s) else @nil_type end
50
+ end
51
+
52
+ def checked_node_type?(node_type) ; node_type != self.nil_type end
53
+ def checked_edge_type?(edge_type) ; edge_type != self.nil_type end
54
+
55
+ def unknown_node_type?(type) ; type == @nil_type end
56
+ def unknown_edge_type?(type) ; type == @nil_type end
57
+
58
+ protected
59
+
60
+ def node_mapped(t)
61
+ if self.node_mapper then self.node_mapper.call(t) else t end
62
+ end
63
+
64
+ def edge_mapped(t)
65
+ if self.edge_mapper then self.edge_mapper.call(t) else t end
66
+ end
67
+
68
+ end
69
+
70
+ class ElementIterator < NeoScout::ElementIterator
71
+
72
+ def iter_nodes(args)
73
+ if args[:report_progress]
74
+ then report = args[:report_progress]
75
+ else report = lambda { |mode, what, num| } end
76
+ glops = org.neo4j.tooling.GlobalGraphOperations.at(Neo4j.db.graph)
77
+ iter = glops.getAllNodes.iterator
78
+ num = 0
79
+ while iter.hasNext do
80
+ node = iter.next
81
+ num = num + 1
82
+ report.call(:progress, :nodes, num)
83
+ yield node unless node.getId == 0
84
+ end
85
+
86
+ report.call(:finish, :nodes, num)
87
+ num
88
+ end
89
+
90
+ def iter_edges(args)
91
+ if args[:report_progress]
92
+ then report = args[:report_progress]
93
+ else report = lambda { |mode, what, num| } end
94
+ glops = org.neo4j.tooling.GlobalGraphOperations.at(Neo4j.db.graph)
95
+ iter = glops.getAllRelationships.iterator
96
+ num = 0
97
+ while iter.hasNext do
98
+ num = num + 1
99
+ report.call(:progress, :edges, num)
100
+ yield iter.next
101
+ end
102
+
103
+ report.call(:finish, :edges, num)
104
+ num
105
+ end
106
+
107
+ end
108
+
109
+ class Verifier < NeoScout::Verifier
110
+ def initialize(typer)
111
+ super()
112
+ @typer = typer
113
+ end
114
+
115
+ def new_node_prop_constr(args={})
116
+ Constraints::PropConstraint.new args
117
+ end
118
+
119
+ def new_edge_prop_constr(args={})
120
+ Constraints::PropConstraint.new args
121
+ end
122
+
123
+ def init_from_json(json)
124
+ super(json)
125
+ end
126
+ end
127
+
128
+ class Scout < NeoScout::Scout
129
+
130
+ def initialize(args={})
131
+ args[:typer] = Typer.new unless args[:typer]
132
+ args[:verifier] = Verifier.new(args[:typer]) unless args[:verifier]
133
+ args[:iterator] = ElementIterator.new unless args[:iterator]
134
+ super args
135
+ end
136
+
137
+ def prep_counts(counts)
138
+ # Ensure __NOTYPE__entries always have a counts array
139
+ counts.typed_nodes[typer.nil_type]
140
+ counts.typed_edges[typer.nil_type]
141
+ counts
142
+ end
143
+ end
144
+
145
+ end
146
+
147
+ end
@@ -0,0 +1,136 @@
1
+ module NeoScout
2
+
3
+ class Counter
4
+
5
+ def to_json
6
+ { 'num_failed' => num_failed, 'num_total' => num_total }
7
+ end
8
+
9
+ end
10
+
11
+
12
+ # TODO indexes
13
+ # TODO edges
14
+ # TODO testing
15
+
16
+ class Verifier
17
+
18
+ def init_from_json(json)
19
+ JSON.cd(json, %w(nodes)).each_pair do |type_name, type_json|
20
+ JSON.cd(type_json, %w(properties)).each_pair do |prop_name, prop_json|
21
+ prop_constr = new_node_prop_constr name: prop_name, opt: !prop_json['relevant']
22
+ prop_set = self.node_props[type_name]
23
+ prop_set << prop_constr
24
+ end
25
+ end
26
+
27
+ JSON.cd(json, %w(connections)).each_pair do |type_name, type_json|
28
+ JSON.cd(type_json, %w(properties)).each_pair do |prop_name, prop_json|
29
+ prop_constr = new_edge_prop_constr name: prop_name, opt: !prop_json['relevant']
30
+ prop_set = self.edge_props[type_name]
31
+ prop_set << prop_constr
32
+ end
33
+
34
+ sources_json = if type_json.has_key?('sources') then type_json['sources'] else [] end
35
+ targets_json = if type_json.has_key?('targets') then type_json['targets'] else [] end
36
+ add_valid_edge_sets type_name, sources_json, targets_json
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+
43
+ class HashWithDefault
44
+
45
+ def to_json
46
+ self.map_value { |v| v.to_json }
47
+ end
48
+ end
49
+
50
+ #noinspection RubyTooManyInstanceVariablesInspection
51
+ class Counts
52
+
53
+ def add_to_json(json)
54
+ all_json = JSON.cd json, %w(all)
55
+ all_json['node_counts'] = @all_nodes.to_json
56
+ all_json['connection_counts'] = @all_edges.to_json
57
+
58
+ nodes_json = JSON.cd(json, %w(nodes))
59
+ @typed_nodes.each_pair do |type, count|
60
+ skip = skip_from_json(:node, type, count)
61
+ JSON.cd(nodes_json, [type])['counts'] = count.to_json unless skip
62
+ end
63
+
64
+ nodes_json = JSON.cd(json, %w(nodes))
65
+ @typed_node_props.each_pair do |type, props|
66
+ props.each_pair do |name, count|
67
+ skip = skip_from_json(:node, type, count)
68
+ JSON.cd(nodes_json, [type, 'properties', name])['counts'] = count.to_json unless skip
69
+ end
70
+ end
71
+
72
+ edges_json = JSON.cd(json, %w(connections))
73
+ @typed_edges.each_pair do |type, count|
74
+ skip = skip_from_json(:edge, type, count)
75
+ JSON.cd(edges_json, [type])['counts'] = count.to_json unless skip
76
+ end
77
+
78
+ edges_json = JSON.cd(json, %w(connections))
79
+ @typed_edge_props.each_pair do |type, props|
80
+ props.each_pair do |name, count|
81
+ skip = skip_from_json(:edge, type, count)
82
+ JSON.cd(edges_json, [type, 'properties', name])['counts'] = count.to_json unless skip
83
+ end
84
+ end
85
+
86
+ add_link_stats_to_json(json)
87
+ end
88
+
89
+ def skip_from_json(kind, type, count)
90
+ false unless count.empty?
91
+ case kind
92
+ when :node
93
+ @typer.unknown_node_type?(type)
94
+ when :edge
95
+ @typer.unknown_edge_type?(type)
96
+ else
97
+ raise ArgumentError
98
+ end
99
+ end
100
+
101
+ protected
102
+
103
+ def add_link_stats_to_json(json)
104
+ nodes_json = JSON.cd(json, %w(nodes))
105
+
106
+ @node_link_src_stats.each_pair do |type, hash|
107
+ JSON.cd(nodes_json, [type])['src_stats'] = hash.to_json
108
+ end
109
+
110
+ @node_link_dst_stats.each_pair do |type, hash|
111
+ JSON.cd(nodes_json, [type])['dst_stats'] = hash.to_json
112
+ end
113
+
114
+ edges_json = JSON.cd(json, %w(connections))
115
+ @edge_link_src_stats.each_pair do |type, hash|
116
+ JSON.cd(edges_json, [type])['src_stats'] = hash_to_json_array('to_dst', hash)
117
+ end
118
+
119
+ @edge_link_dst_stats.each_pair do |type, hash|
120
+ JSON.cd(edges_json, [type])['dst_stats'] = hash_to_json_array('from_src', hash)
121
+ end
122
+ end
123
+
124
+ def hash_to_json_array(target, hash)
125
+ result = []
126
+ hash.each_pair do |key, value|
127
+ inner = []
128
+ value.each_pair do |i_key, i_value|
129
+ inner << { 'name' => i_key, 'counts' => i_value.to_json }
130
+ end
131
+ result << { 'name' => key, target => inner }
132
+ end
133
+ result
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,205 @@
1
+ require 'java'
2
+ require 'yaml'
3
+ require 'optparse'
4
+ require 'json'
5
+ require 'httparty'
6
+ require 'date'
7
+ require 'active_model'
8
+ require 'active_support/inflector'
9
+
10
+ module NeoScout
11
+
12
+ class Main
13
+ attr_reader :opt_db
14
+ attr_reader :opt_schema
15
+ attr_reader :opt_port
16
+ attr_reader :opt_webservice
17
+ attr_reader :opt_bind
18
+ attr_reader :opt_report
19
+ attr_reader :opt_no_nodes
20
+ attr_reader :opt_db_config
21
+ attr_reader :opt_no_edges
22
+ attr_reader :opt_type_mapper
23
+ attr_reader :opt_pre_mapper
24
+ attr_reader :opt_output_file
25
+
26
+
27
+ def initialize
28
+ @opt_report = 0
29
+ @opt_webservice = false
30
+ @opt_output_file = nil
31
+ @opt_no_nodes = false
32
+ @opt_db_config = nil
33
+ @opt_no_edges = false
34
+ @opt_pre_mapper = lambda { |t| t }
35
+ @opt_type_mapper = lambda { |t| t }
36
+ parse_opts
37
+ end
38
+
39
+ def parse_opts
40
+ optparse = OptionParser.new do |opts|
41
+ opts.banner = "Usage: --db <neo4j:path> --schema <url> [--port <port>]"
42
+ opts.on('-d', '--db DB', 'Path to database in the form neo4j:<path>)') do |db|
43
+ @opt_db = db
44
+ end
45
+ opts.on('-u', '--schema-url URL', 'URL to database schema') do |url|
46
+ @opt_schema = lambda { || ::JSON.parse(HTTParty.get(url)) }
47
+ end
48
+ opts.on('-s', '--schema-file FILE', 'schema file') do |file|
49
+ @opt_schema = lambda { || ::JSON.parse(IO::read(file)) }
50
+ end
51
+ opts.on('-o', '--output-file FILE', 'output file in standalone mode') do |f|
52
+ @opt_output_file = f
53
+ end
54
+ opts.on('-w', '--webservice', 'Run inside sinatra') do
55
+ @opt_webservice = true
56
+ end
57
+ opts.on('-p', '--port PORT', 'Port to be used') do |port|
58
+ @opt_port = port.to_i
59
+ end
60
+ opts.on('-b', '--bind ITF', 'Interface to be used') do |itf|
61
+ @port = itf
62
+ end
63
+ opts.on('-r', '--report NUM', 'Report progress every NUM graph elements') do |num|
64
+ @opt_report = num.to_i
65
+ end
66
+ opts.on('--no-nodes', 'Do not iterate over nodes') do
67
+ @opt_no_nodes = true
68
+ end
69
+ opts.on('--no-edges', 'Do not iterate over edges') do
70
+ @opt_no_edges = true
71
+ end
72
+ opts.on('-P', '--pluralize-types', 'Pluralize type names') do
73
+ @opt_pre_mapper = lambda { |t| t.pluralize }
74
+ end
75
+ opts.on('-S', '--singularize-types', 'Singularize type names') do
76
+ @opt_pre_mapper = lambda { |t| t.singularize }
77
+ end
78
+ opts.on('-M', '--type-mapper MAPPER',
79
+ 'Set the type mapper (underscore, downcase, upcase)') do |mapper|
80
+ @opt_type_mapper = case mapper
81
+ when 'underscore'
82
+ lambda { |t| t.underscore }
83
+ when 'downcase'
84
+ lambda { |t| t.downcase }
85
+ when 'upcase'
86
+ lambda { |t| t.upcase }
87
+ else
88
+ raise ArgumentException('Unsupported mapper')
89
+ end
90
+ end
91
+ opts.on('-C', '--db-config FILE', 'Set config file for db driver') do |file|
92
+ @opt_db_config = file
93
+ end
94
+ opts.on('-h', '--help', 'Display this screen') do
95
+ puts opts
96
+ exit 1
97
+ end
98
+ end
99
+ optparse.parse!
100
+ @opt_output_file = nil if @opt_webservice
101
+ end
102
+
103
+ def start_db
104
+ @opt_db.match(/(neo4j:)(.*)/) do |m|
105
+ Neo4j.config[:storage_path] = m[2] unless (m[2].length == 0)
106
+ YAML.load_file(@opt_db_config).each_pair { |k,v| Neo4j.config[k] = v } if @opt_db_config
107
+ Neo4j.start
108
+ return lambda do
109
+ scout = ::NeoScout::GDB_Neo4j::Scout.new
110
+ pre_mapper = self.opt_pre_mapper
111
+ scout.typer.node_mapper = lambda { |t| self.opt_type_mapper.call(pre_mapper.call(t)) }
112
+ scout.typer.edge_mapper = lambda { |t| self.opt_type_mapper.call(pre_mapper.call(t)) }
113
+ scout
114
+ end
115
+ end
116
+
117
+ raise ArgumentError("Unsupported database type")
118
+ end
119
+
120
+ def shutdown_db
121
+ @opt_db.match(/(neo4j:)(.*)/) do |m|
122
+ Neo4j.shutdown
123
+ return
124
+ end
125
+
126
+ raise ArgumentError("Unsupported database type")
127
+ end
128
+
129
+ class SimpleConsoleLogger
130
+ def method_missing(key, *args)
131
+ print key
132
+ print ': '
133
+ puts *args
134
+ end
135
+ end
136
+
137
+ def run
138
+ ### Load schema at least once to know that we're safe
139
+ self.opt_schema.call()
140
+ ### Run as service if requested
141
+ return run_webservice(self.opt_schema, self.start_db) if self.opt_webservice
142
+
143
+ json = run_standalone(self.opt_schema, self.start_db, SimpleConsoleLogger.new)
144
+ if self.opt_output_file
145
+ then File.open(self.opt_output_file, 'w') { |f| f.write(json) }
146
+ else puts(json) end
147
+ shutdown_db
148
+ end
149
+
150
+ def run_standalone(schema_maker, scout_maker, logger)
151
+ schema = schema_maker.call()
152
+ scout = scout_maker.call()
153
+ scout.verifier.init_from_json schema
154
+ counts = scout.new_counts
155
+ logger = SimpleConsoleLogger.new unless logger
156
+ progress = lambda do |mode, what, num|
157
+ if ((num % self.opt_report) == 0) || (mode == :finish)
158
+ logger.info("#{DateTime.now}: #{what} ITERATOR PROGRESS (#{mode} / #{num})")
159
+ end
160
+ end
161
+ scout.count_edges counts: counts, report_progress: progress unless self.opt_no_edges
162
+ scout.count_nodes counts: counts, report_progress: progress unless self.opt_no_nodes
163
+ counts.add_to_json schema
164
+ schema.to_json
165
+ end
166
+
167
+ def run_webservice(schema_maker, scout_maker)
168
+ ### Run sinatra
169
+ require 'sinatra'
170
+
171
+ set :port, @opt_port if @opt_port
172
+ set :bind, @opt_bind if @opt_bind
173
+ set :show_exceptions, true
174
+ set :sessions, false
175
+ set :logging, true
176
+ set :dump_errors, true
177
+ set :lock, true # -- really?
178
+ set :root, File.expand_path("../../root", __FILE__)
179
+ set :run, true
180
+ # set :public_folder
181
+
182
+ ### Keep self around for calling helpers in sinatra handlers
183
+ main = self
184
+
185
+ ### Return schema
186
+ get '/schema' do
187
+ content_type :json
188
+ schema_maker.call().to_json
189
+ end
190
+
191
+ ### Run verify over database and report results
192
+ get '/verify' do
193
+ content_type :json
194
+ main.run_standalone(schema_maker, scout_maker, self.logger)
195
+ end
196
+
197
+ ### Shutdown server, the hard way
198
+ get '/shutdown' do
199
+ main.shutdown_db
200
+ java.lang.System.exit(0)
201
+ end
202
+ end
203
+ end
204
+
205
+ end