neoscout 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ .idea
2
+ .bundle
3
+ doc
4
+ db
5
+ pkg
6
+ html
7
+ package
8
+ coverage
9
+ bin
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --colour
3
+ --tty
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm --create use jruby-1.6.7@neoscout
2
+ export JRUBY_OPTS="$JRUBY_OPTS -Xcompat.version=1.9"
data/AUTHORS ADDED
@@ -0,0 +1 @@
1
+ Stefan Plantikow <stefanp@moviepilot.com>
data/Gemfile ADDED
@@ -0,0 +1,21 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'sinatra'
4
+ gem 'httparty'
5
+ gem 'json', '>=1.6.5'
6
+ gem 'activemodel', '3.1.3'
7
+ gem 'activesupport', '3.1.3'
8
+
9
+ group :development do
10
+ gem 'rake'
11
+ gem 'irbtools'
12
+ end
13
+
14
+ group :test do
15
+ gem 'rspec', '2.6.0'
16
+ gem 'simplecov'
17
+ end
18
+
19
+ platforms :jruby do
20
+ gem 'neo4j', '1.3.1'
21
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,138 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ actionpack (3.1.3)
5
+ activemodel (= 3.1.3)
6
+ activesupport (= 3.1.3)
7
+ builder (~> 3.0.0)
8
+ erubis (~> 2.7.0)
9
+ i18n (~> 0.6)
10
+ rack (~> 1.3.5)
11
+ rack-cache (~> 1.1)
12
+ rack-mount (~> 0.8.2)
13
+ rack-test (~> 0.6.1)
14
+ sprockets (~> 2.0.3)
15
+ activemodel (3.1.3)
16
+ activesupport (= 3.1.3)
17
+ builder (~> 3.0.0)
18
+ i18n (~> 0.6)
19
+ activesupport (3.1.3)
20
+ multi_json (~> 1.0)
21
+ awesome_print (1.0.2)
22
+ boson (1.1.0)
23
+ builder (3.0.0)
24
+ clipboard (1.0.1)
25
+ coderay (1.0.5)
26
+ diff-lcs (1.1.3)
27
+ erubis (2.7.0)
28
+ every_day_irb (1.2.0)
29
+ fancy_irb (0.7.2)
30
+ paint (>= 0.8.1)
31
+ unicode-display_width (>= 0.1.1)
32
+ g (1.6.0)
33
+ ruby_gntp
34
+ hike (1.2.1)
35
+ hirb (0.6.1)
36
+ httparty (0.8.1)
37
+ multi_json
38
+ multi_xml
39
+ i18n (0.6.0)
40
+ interactive_editor (0.0.10)
41
+ spoon (>= 0.0.1)
42
+ irbtools (1.2.0)
43
+ awesome_print (~> 1.0.2)
44
+ boson (>= 0.3.4)
45
+ clipboard (~> 1.0.0)
46
+ coderay (~> 1.0.5)
47
+ every_day_irb (>= 1.2.0)
48
+ fancy_irb (>= 0.7.2)
49
+ g (>= 1.5.0)
50
+ hirb (~> 0.6.0)
51
+ interactive_editor (>= 0.0.10)
52
+ method_locator (>= 0.0.4)
53
+ method_source (>= 0.7.0)
54
+ methodfinder (>= 1.2.5)
55
+ ori (~> 0.1.0)
56
+ paint (>= 0.8.4)
57
+ sketches (>= 0.1.1)
58
+ wirb (>= 0.4.1)
59
+ zucker (>= 11)
60
+ json (1.6.5-java)
61
+ method_locator (0.0.4)
62
+ method_source (0.7.1)
63
+ methodfinder (1.2.5)
64
+ multi_json (1.1.0)
65
+ multi_xml (0.4.1)
66
+ neo4j (1.3.1-java)
67
+ activemodel (>= 3.0.0)
68
+ orm_adapter (>= 0.0.3)
69
+ railties (>= 3.0.0)
70
+ will_paginate (= 3.0.pre4)
71
+ ori (0.1.0)
72
+ orm_adapter (0.0.6)
73
+ paint (0.8.4)
74
+ rack (1.3.6)
75
+ rack-cache (1.2)
76
+ rack (>= 0.4)
77
+ rack-mount (0.8.3)
78
+ rack (>= 1.0.0)
79
+ rack-protection (1.2.0)
80
+ rack
81
+ rack-ssl (1.3.2)
82
+ rack
83
+ rack-test (0.6.1)
84
+ rack (>= 1.0)
85
+ railties (3.1.3)
86
+ actionpack (= 3.1.3)
87
+ activesupport (= 3.1.3)
88
+ rack-ssl (~> 1.3.2)
89
+ rake (>= 0.8.7)
90
+ rdoc (~> 3.4)
91
+ thor (~> 0.14.6)
92
+ rake (0.9.2.2)
93
+ rdoc (3.12)
94
+ json (~> 1.4)
95
+ rspec (2.6.0)
96
+ rspec-core (~> 2.6.0)
97
+ rspec-expectations (~> 2.6.0)
98
+ rspec-mocks (~> 2.6.0)
99
+ rspec-core (2.6.4)
100
+ rspec-expectations (2.6.0)
101
+ diff-lcs (~> 1.1.2)
102
+ rspec-mocks (2.6.0)
103
+ ruby_gntp (0.3.4)
104
+ simplecov (0.6.1)
105
+ multi_json (~> 1.0)
106
+ simplecov-html (~> 0.5.3)
107
+ simplecov-html (0.5.3)
108
+ sinatra (1.3.2)
109
+ rack (~> 1.3, >= 1.3.6)
110
+ rack-protection (~> 1.2)
111
+ tilt (~> 1.3, >= 1.3.3)
112
+ sketches (0.1.1)
113
+ spoon (0.0.1)
114
+ sprockets (2.0.3)
115
+ hike (~> 1.2)
116
+ rack (~> 1.0)
117
+ tilt (~> 1.1, != 1.3.0)
118
+ thor (0.14.6)
119
+ tilt (1.3.3)
120
+ unicode-display_width (0.1.1)
121
+ will_paginate (3.0.pre4)
122
+ wirb (0.4.1)
123
+ zucker (12.1)
124
+
125
+ PLATFORMS
126
+ java
127
+
128
+ DEPENDENCIES
129
+ activemodel (= 3.1.3)
130
+ activesupport (= 3.1.3)
131
+ httparty
132
+ irbtools
133
+ json (>= 1.6.5)
134
+ neo4j (= 1.3.1)
135
+ rake
136
+ rspec (= 2.6.0)
137
+ simplecov
138
+ sinatra
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2012 Stefan Plantikow
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/README.md ADDED
@@ -0,0 +1,194 @@
1
+ # neoscout
2
+
3
+ Neoscout is a tool for verifying the schema and visualizing the structure of graph databases. It is currently geared exclusively towards neo4j but could easily be extended for other graph stores.
4
+
5
+
6
+ ## Overview
7
+
8
+ neoscout walks a graph by iterating over edges and nodes (= graph element). For each visited graph element, schema properties are verified. Depending on wether the element passes all requirements, it is considered verified or failed and counted. Required schema properties are either set programmatically or parsed from a JSON schema file as described below.
9
+
10
+ The general programmatic use of neoscout is:
11
+
12
+ ```ruby
13
+ require 'json'
14
+ require 'neoscout'
15
+
16
+ # load schema
17
+ schema_json = JOSN.parse('...schema...')
18
+ # make new scout for use with embedded neo4j database
19
+ scout = ::NeoScout::GDB_Neo4j::Scout.new
20
+ # parse schema
21
+ scout.verifier.init_from_json schema_json
22
+ # iterate over edges and nodes, collecting statistics
23
+ counts = scout.new_counts
24
+ scout.count_edges counts: counts
25
+ scout.count_nodes counts: counts
26
+ # add collected statistics back to schema_json
27
+ counts.add_to_json schema_json
28
+ # print result
29
+ puts "<<RESULT\n#{schema_json.to_json}\nRESULT"
30
+ ```
31
+
32
+
33
+ ## Depends on
34
+
35
+ jruby, neo4j, sinatra, json
36
+
37
+
38
+ ## Installation
39
+
40
+ As a gem or via the typical bundle and rake build test install dance.
41
+
42
+
43
+ ## Model
44
+
45
+ neoscout assumes that the underlying graph is directed, that nodes and edges can be assigned a unique type in the
46
+ schema, and that arbitrary properties may be assigned to each node and edge.
47
+
48
+
49
+ ## JSON Schema
50
+
51
+
52
+ ### Schema Format
53
+
54
+ The currently supported schema format is
55
+
56
+ ```json
57
+ {
58
+ "nodes":{
59
+ "node_type_a": {
60
+ "properties": {
61
+ "property_a": { "relevant": false },
62
+ "property_b": { "relevant": true }
63
+ }
64
+ },
65
+ "node_type_b": {
66
+ "properties": {
67
+ "property_a": { "relevant": true },
68
+ "property_b": { "relevant": false }
69
+ }
70
+ }
71
+ },
72
+ "connections":{
73
+ "edge_type_a": {
74
+ "properties": {
75
+ "property_a": { "relevant": false },
76
+ "property_b": { "relevant": true }
77
+ },
78
+ "sources": ["node_type_a", "node_type_b"],
79
+ "targets": ["node_type_a"]
80
+ },
81
+ "edge_type_b": {
82
+ "properties": {
83
+ "property_a": { "relevant": false },
84
+ "property_b": { "relevant": true }
85
+ }
86
+ }
87
+ }
88
+ }
89
+ ```
90
+
91
+ Some properties may additionally specify a value type under `type`. Verification of value types needs to be
92
+ specified programmatically.
93
+
94
+
95
+ ### Schema output
96
+
97
+ When the validation has been completed, collected statistics may be appended the input JSON schema. For every collected
98
+ statistic, a counter of the form `[num_failed, num_total]` is added. The list of currently collected
99
+ statistics is:
100
+
101
+ * `nodes/type/counts` number of of nodes of type `type`
102
+ considered ok iff all relevant node properties are verified succesfully and no additional unknown node properties
103
+ are found or iff no node properties were specified for this type
104
+ * `connections/type/counts` number of of edges of type `type`
105
+ verified similar to nodes
106
+ * `nodes/type/properties/prop/counts` number of properties `prop` in nodes of type `type`
107
+ considered ok iff the property was found and its conncrete value matched its value type (if given in the schema)
108
+ or the property was not found and was specified as `"relevant": false`
109
+ * `connections/type/properties/prop/counts` number of properties `prop` in edges of type `type`
110
+ verified similar to nodes
111
+ * `all/node_counts` number of nodes
112
+ considered ok iff the node was ok according to its type
113
+ * `all/connection_counts` number of edges
114
+ considered ok iff the node was ok according to its type
115
+
116
+ Additionally, for each edge of edge type `edge_type` it is verified, wether it's source and destination node types
117
+ `src_type` and `dst_type` are found in `connections/type/sources` and `connections/type/targets` respectively.
118
+ Any edge type, for which these arrays are missing is considered to be verified by this test. Again, the results are
119
+ aggregated into various statistics:
120
+
121
+ * `connections/edge_type/src_stats/[ { "name": src_type, "to_dst": [ { "name": dst_type, "counts": /*count */ } ] } ]`
122
+ * `connections/edge_type/dst_stats/[ { "name": dst_type, "from_src": [ { "name": src_type, "counts": /*count */ } ] } ]`
123
+ * `nodes/src_type/src_stats/edge_type`
124
+ * `nodes/dst_type/dst_stats/edge_type`
125
+
126
+
127
+ ### Optional type testing
128
+
129
+ Additionally, schema properties may have a string-valued `type` property for testing property *values.
130
+ To register a type test for property values, your implementation of `Typer` needs to mixin
131
+ `TyperValueTableMixin` (true for NeoScout::GDB_Neo4j::Typer). Then, just call:
132
+
133
+ ```ruby
134
+ typer.value_type_table['string'] = lambda { |n,v| v.kind_of? String }
135
+ ```
136
+
137
+ to register your type tests. Properties that have a `type` attribute, and for which a type test is
138
+ registered but whose value fails the test are considered as failed and reported accordingly.
139
+
140
+
141
+ ### Example
142
+
143
+ Please see `spec/lib/neoscout/gdb_neo4_spec.rb`, `spec/lib/neoscout/gdb_neo4_spec_schema.json`,
144
+ `spec/lib/neoscout/gdb_neo4_spec_counts.json` for an extended example.
145
+
146
+
147
+ ## Standalone runner
148
+
149
+ There is a rudimentary, sinatra-based standalone runner that attaches to a local neo4j databases, and upon request
150
+ fetches a schema url and verifies the database against it. It is in `scripts/neoscout` and is installed by default.
151
+ Please consult `neoscout --help` for more details.
152
+
153
+ ### Webservice API
154
+
155
+ The standalone runner can be run as a RESTful webservice using `-w`. If this is done, it suppors the
156
+ follwing API
157
+
158
+ * `/schema` retrieve schema
159
+ * `/verify` trigger verification
160
+ * `/shutdown` shutdown
161
+
162
+
163
+ ## Implementation Notes
164
+
165
+ This is how things work right now but expect a major change in architecture in the next version.
166
+
167
+ `Scout` is the main class that implements the generic logic for processing nodes and edges using several
168
+ helper classes
169
+
170
+ * `Typer` assigns types to nodes and edges
171
+ * `Verifier` checks all schema properties
172
+ * `Iterator` provides iteration constructs for iterating over the nodes and edges of the underlying graph
173
+ * furthermore, `Scout` features overridable factory methods for the construction of subclasses implementing
174
+ various Constraints, most importantly the node and edge propery constraints. Basic implementations of those
175
+ are provided in `constraints.rb`
176
+
177
+ `Counts` is used for collecting statistics and heavily tied to logic implemented by `Scout`.
178
+
179
+ JSON schema handling is in `json_schema.rb`
180
+
181
+
182
+ ### Specializing for a new database
183
+
184
+ Please consult `gdb_neo4j.rb` to see how to do that, essentially you subclass `Scout` and potentially override default
185
+ values for the various member fields. The standalone runner currently is heavily tied to neo4j.
186
+
187
+
188
+ ### Notes on GDB_Neo4j
189
+
190
+ * Node types are currently derived from a configurable property (defaults to '_classname')
191
+ * Edge types directly correspond to the relationship type in neo4j
192
+ * Unkown nodes/edges are assigned to a reserved `__NOTYPE__` type (the actual string may be overriden, see Typer)
193
+ * You can pass configuration options for neo4j using `-C <path-to-yml>`. This is especially important for larger
194
+ databases. See etc/neo4j.yml for an example.
data/Rakefile ADDED
@@ -0,0 +1,30 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ require 'rdoc/task'
4
+ require 'rspec'
5
+ require 'rspec/core/rake_task'
6
+
7
+ desc 'Run all rspecs'
8
+ RSpec::Core::RakeTask.new(:spec) do |spec|
9
+ spec.fail_on_error = true
10
+ spec.verbose = false
11
+ # spec.rspec_opts = ['--backtrace']
12
+ end
13
+
14
+ desc 'Run rdoc over project sources'
15
+ RDoc::Task.new(:rdoc) do |rdoc|
16
+ # rdoc.main = "README.rdoc"
17
+ rdoc.rdoc_files.include("lib/**/*.rb")
18
+ end
19
+
20
+ desc 'Run irb in project environment'
21
+ task :console do
22
+ require 'irb'
23
+ ARGV.clear
24
+ IRB.conf[:USE_READLINE] = false if ENV['JRUBY_OPTS'] =~ /--ng/
25
+ IRB.start
26
+ end
27
+
28
+ task :doc => :rdoc
29
+ task :test => :spec
30
+ task :irb => :console
data/TODO.org ADDED
@@ -0,0 +1,27 @@
1
+ * Version 0.1
2
+ ** DONE Minimal documentation
3
+ ** Features
4
+ *** DONE type testing mechanism
5
+ *** DONE cardconstraints if ever: nixed, to be replaced with pacer-jogger constraints in 0.2
6
+ *** visualization
7
+ *** read-only mode for neo4j
8
+ ** Integration and (alongside) testing
9
+ *** put into sheldon
10
+ *** limit number of queries / share results(?)
11
+ *** more tests
12
+ * Version 0.2
13
+ ** Redesign internal structure
14
+ *** Brainstoerming
15
+ ** Features
16
+ *** aggregate min-max-values -> recover largest edge ids
17
+ *** more precise schema input
18
+ *** pacer-jogger constraint support
19
+ *** cypher constraint support
20
+ *** query execution tool (?)
21
+ * Version 0.3
22
+ ** Cleanup standalone runner
23
+ *** DONE Refactor into class
24
+ *** Push options to sinatra
25
+ *** Better initialization (i.e. register types)
26
+ ** Support for blueprints graphs (?)
27
+ ** Parallelize iteration over graph