neoscout 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/.rvmrc +2 -0
- data/AUTHORS +1 -0
- data/Gemfile +21 -0
- data/Gemfile.lock +138 -0
- data/LICENSE.txt +21 -0
- data/README.md +194 -0
- data/Rakefile +30 -0
- data/TODO.org +27 -0
- data/etc/neo4j.yml +16 -0
- data/lib/neoscout.rb +12 -0
- data/lib/neoscout/constraints.rb +35 -0
- data/lib/neoscout/gdb_neo4j.rb +147 -0
- data/lib/neoscout/json_schema.rb +136 -0
- data/lib/neoscout/main.rb +205 -0
- data/lib/neoscout/model.rb +148 -0
- data/lib/neoscout/scout.rb +119 -0
- data/lib/neoscout/tools.rb +156 -0
- data/lib/neoscout/version.rb +3 -0
- data/neoscout.gemspec +25 -0
- data/root/README.md +3 -0
- data/script/neoscout +15 -0
- data/spec/lib/neoscout/constraints_spec.rb +25 -0
- data/spec/lib/neoscout/gdb_neo4j_spec.rb +81 -0
- data/spec/lib/neoscout/gdb_neo4j_spec_counts.json +282 -0
- data/spec/lib/neoscout/gdb_neo4j_spec_schema.json +46 -0
- data/spec/lib/neoscout/model_spec.rb +42 -0
- data/spec/lib/neoscout/tools_spec.rb +139 -0
- data/spec/spec_helper.rb +5 -0
- metadata +84 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
data/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Stefan Plantikow <stefanp@moviepilot.com>
|
data/Gemfile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'sinatra'
|
4
|
+
gem 'httparty'
|
5
|
+
gem 'json', '>=1.6.5'
|
6
|
+
gem 'activemodel', '3.1.3'
|
7
|
+
gem 'activesupport', '3.1.3'
|
8
|
+
|
9
|
+
group :development do
|
10
|
+
gem 'rake'
|
11
|
+
gem 'irbtools'
|
12
|
+
end
|
13
|
+
|
14
|
+
group :test do
|
15
|
+
gem 'rspec', '2.6.0'
|
16
|
+
gem 'simplecov'
|
17
|
+
end
|
18
|
+
|
19
|
+
platforms :jruby do
|
20
|
+
gem 'neo4j', '1.3.1'
|
21
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
actionpack (3.1.3)
|
5
|
+
activemodel (= 3.1.3)
|
6
|
+
activesupport (= 3.1.3)
|
7
|
+
builder (~> 3.0.0)
|
8
|
+
erubis (~> 2.7.0)
|
9
|
+
i18n (~> 0.6)
|
10
|
+
rack (~> 1.3.5)
|
11
|
+
rack-cache (~> 1.1)
|
12
|
+
rack-mount (~> 0.8.2)
|
13
|
+
rack-test (~> 0.6.1)
|
14
|
+
sprockets (~> 2.0.3)
|
15
|
+
activemodel (3.1.3)
|
16
|
+
activesupport (= 3.1.3)
|
17
|
+
builder (~> 3.0.0)
|
18
|
+
i18n (~> 0.6)
|
19
|
+
activesupport (3.1.3)
|
20
|
+
multi_json (~> 1.0)
|
21
|
+
awesome_print (1.0.2)
|
22
|
+
boson (1.1.0)
|
23
|
+
builder (3.0.0)
|
24
|
+
clipboard (1.0.1)
|
25
|
+
coderay (1.0.5)
|
26
|
+
diff-lcs (1.1.3)
|
27
|
+
erubis (2.7.0)
|
28
|
+
every_day_irb (1.2.0)
|
29
|
+
fancy_irb (0.7.2)
|
30
|
+
paint (>= 0.8.1)
|
31
|
+
unicode-display_width (>= 0.1.1)
|
32
|
+
g (1.6.0)
|
33
|
+
ruby_gntp
|
34
|
+
hike (1.2.1)
|
35
|
+
hirb (0.6.1)
|
36
|
+
httparty (0.8.1)
|
37
|
+
multi_json
|
38
|
+
multi_xml
|
39
|
+
i18n (0.6.0)
|
40
|
+
interactive_editor (0.0.10)
|
41
|
+
spoon (>= 0.0.1)
|
42
|
+
irbtools (1.2.0)
|
43
|
+
awesome_print (~> 1.0.2)
|
44
|
+
boson (>= 0.3.4)
|
45
|
+
clipboard (~> 1.0.0)
|
46
|
+
coderay (~> 1.0.5)
|
47
|
+
every_day_irb (>= 1.2.0)
|
48
|
+
fancy_irb (>= 0.7.2)
|
49
|
+
g (>= 1.5.0)
|
50
|
+
hirb (~> 0.6.0)
|
51
|
+
interactive_editor (>= 0.0.10)
|
52
|
+
method_locator (>= 0.0.4)
|
53
|
+
method_source (>= 0.7.0)
|
54
|
+
methodfinder (>= 1.2.5)
|
55
|
+
ori (~> 0.1.0)
|
56
|
+
paint (>= 0.8.4)
|
57
|
+
sketches (>= 0.1.1)
|
58
|
+
wirb (>= 0.4.1)
|
59
|
+
zucker (>= 11)
|
60
|
+
json (1.6.5-java)
|
61
|
+
method_locator (0.0.4)
|
62
|
+
method_source (0.7.1)
|
63
|
+
methodfinder (1.2.5)
|
64
|
+
multi_json (1.1.0)
|
65
|
+
multi_xml (0.4.1)
|
66
|
+
neo4j (1.3.1-java)
|
67
|
+
activemodel (>= 3.0.0)
|
68
|
+
orm_adapter (>= 0.0.3)
|
69
|
+
railties (>= 3.0.0)
|
70
|
+
will_paginate (= 3.0.pre4)
|
71
|
+
ori (0.1.0)
|
72
|
+
orm_adapter (0.0.6)
|
73
|
+
paint (0.8.4)
|
74
|
+
rack (1.3.6)
|
75
|
+
rack-cache (1.2)
|
76
|
+
rack (>= 0.4)
|
77
|
+
rack-mount (0.8.3)
|
78
|
+
rack (>= 1.0.0)
|
79
|
+
rack-protection (1.2.0)
|
80
|
+
rack
|
81
|
+
rack-ssl (1.3.2)
|
82
|
+
rack
|
83
|
+
rack-test (0.6.1)
|
84
|
+
rack (>= 1.0)
|
85
|
+
railties (3.1.3)
|
86
|
+
actionpack (= 3.1.3)
|
87
|
+
activesupport (= 3.1.3)
|
88
|
+
rack-ssl (~> 1.3.2)
|
89
|
+
rake (>= 0.8.7)
|
90
|
+
rdoc (~> 3.4)
|
91
|
+
thor (~> 0.14.6)
|
92
|
+
rake (0.9.2.2)
|
93
|
+
rdoc (3.12)
|
94
|
+
json (~> 1.4)
|
95
|
+
rspec (2.6.0)
|
96
|
+
rspec-core (~> 2.6.0)
|
97
|
+
rspec-expectations (~> 2.6.0)
|
98
|
+
rspec-mocks (~> 2.6.0)
|
99
|
+
rspec-core (2.6.4)
|
100
|
+
rspec-expectations (2.6.0)
|
101
|
+
diff-lcs (~> 1.1.2)
|
102
|
+
rspec-mocks (2.6.0)
|
103
|
+
ruby_gntp (0.3.4)
|
104
|
+
simplecov (0.6.1)
|
105
|
+
multi_json (~> 1.0)
|
106
|
+
simplecov-html (~> 0.5.3)
|
107
|
+
simplecov-html (0.5.3)
|
108
|
+
sinatra (1.3.2)
|
109
|
+
rack (~> 1.3, >= 1.3.6)
|
110
|
+
rack-protection (~> 1.2)
|
111
|
+
tilt (~> 1.3, >= 1.3.3)
|
112
|
+
sketches (0.1.1)
|
113
|
+
spoon (0.0.1)
|
114
|
+
sprockets (2.0.3)
|
115
|
+
hike (~> 1.2)
|
116
|
+
rack (~> 1.0)
|
117
|
+
tilt (~> 1.1, != 1.3.0)
|
118
|
+
thor (0.14.6)
|
119
|
+
tilt (1.3.3)
|
120
|
+
unicode-display_width (0.1.1)
|
121
|
+
will_paginate (3.0.pre4)
|
122
|
+
wirb (0.4.1)
|
123
|
+
zucker (12.1)
|
124
|
+
|
125
|
+
PLATFORMS
|
126
|
+
java
|
127
|
+
|
128
|
+
DEPENDENCIES
|
129
|
+
activemodel (= 3.1.3)
|
130
|
+
activesupport (= 3.1.3)
|
131
|
+
httparty
|
132
|
+
irbtools
|
133
|
+
json (>= 1.6.5)
|
134
|
+
neo4j (= 1.3.1)
|
135
|
+
rake
|
136
|
+
rspec (= 2.6.0)
|
137
|
+
simplecov
|
138
|
+
sinatra
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2012 Stefan Plantikow
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
data/README.md
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
# neoscout
|
2
|
+
|
3
|
+
Neoscout is a tool for verifying the schema and visualizing the structure of graph databases. It is currently geared exclusively towards neo4j but could easily be extended for other graph stores.
|
4
|
+
|
5
|
+
|
6
|
+
## Overview
|
7
|
+
|
8
|
+
neoscout walks a graph by iterating over edges and nodes (= graph element). For each visited graph element, schema properties are verified. Depending on wether the element passes all requirements, it is considered verified or failed and counted. Required schema properties are either set programmatically or parsed from a JSON schema file as described below.
|
9
|
+
|
10
|
+
The general programmatic use of neoscout is:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
require 'json'
|
14
|
+
require 'neoscout'
|
15
|
+
|
16
|
+
# load schema
|
17
|
+
schema_json = JOSN.parse('...schema...')
|
18
|
+
# make new scout for use with embedded neo4j database
|
19
|
+
scout = ::NeoScout::GDB_Neo4j::Scout.new
|
20
|
+
# parse schema
|
21
|
+
scout.verifier.init_from_json schema_json
|
22
|
+
# iterate over edges and nodes, collecting statistics
|
23
|
+
counts = scout.new_counts
|
24
|
+
scout.count_edges counts: counts
|
25
|
+
scout.count_nodes counts: counts
|
26
|
+
# add collected statistics back to schema_json
|
27
|
+
counts.add_to_json schema_json
|
28
|
+
# print result
|
29
|
+
puts "<<RESULT\n#{schema_json.to_json}\nRESULT"
|
30
|
+
```
|
31
|
+
|
32
|
+
|
33
|
+
## Depends on
|
34
|
+
|
35
|
+
jruby, neo4j, sinatra, json
|
36
|
+
|
37
|
+
|
38
|
+
## Installation
|
39
|
+
|
40
|
+
As a gem or via the typical bundle and rake build test install dance.
|
41
|
+
|
42
|
+
|
43
|
+
## Model
|
44
|
+
|
45
|
+
neoscout assumes that the underlying graph is directed, that nodes and edges can be assigned a unique type in the
|
46
|
+
schema, and that arbitrary properties may be assigned to each node and edge.
|
47
|
+
|
48
|
+
|
49
|
+
## JSON Schema
|
50
|
+
|
51
|
+
|
52
|
+
### Schema Format
|
53
|
+
|
54
|
+
The currently supported schema format is
|
55
|
+
|
56
|
+
```json
|
57
|
+
{
|
58
|
+
"nodes":{
|
59
|
+
"node_type_a": {
|
60
|
+
"properties": {
|
61
|
+
"property_a": { "relevant": false },
|
62
|
+
"property_b": { "relevant": true }
|
63
|
+
}
|
64
|
+
},
|
65
|
+
"node_type_b": {
|
66
|
+
"properties": {
|
67
|
+
"property_a": { "relevant": true },
|
68
|
+
"property_b": { "relevant": false }
|
69
|
+
}
|
70
|
+
}
|
71
|
+
},
|
72
|
+
"connections":{
|
73
|
+
"edge_type_a": {
|
74
|
+
"properties": {
|
75
|
+
"property_a": { "relevant": false },
|
76
|
+
"property_b": { "relevant": true }
|
77
|
+
},
|
78
|
+
"sources": ["node_type_a", "node_type_b"],
|
79
|
+
"targets": ["node_type_a"]
|
80
|
+
},
|
81
|
+
"edge_type_b": {
|
82
|
+
"properties": {
|
83
|
+
"property_a": { "relevant": false },
|
84
|
+
"property_b": { "relevant": true }
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
```
|
90
|
+
|
91
|
+
Some properties may additionally specify a value type under `type`. Verification of value types needs to be
|
92
|
+
specified programmatically.
|
93
|
+
|
94
|
+
|
95
|
+
### Schema output
|
96
|
+
|
97
|
+
When the validation has been completed, collected statistics may be appended the input JSON schema. For every collected
|
98
|
+
statistic, a counter of the form `[num_failed, num_total]` is added. The list of currently collected
|
99
|
+
statistics is:
|
100
|
+
|
101
|
+
* `nodes/type/counts` number of of nodes of type `type`
|
102
|
+
considered ok iff all relevant node properties are verified succesfully and no additional unknown node properties
|
103
|
+
are found or iff no node properties were specified for this type
|
104
|
+
* `connections/type/counts` number of of edges of type `type`
|
105
|
+
verified similar to nodes
|
106
|
+
* `nodes/type/properties/prop/counts` number of properties `prop` in nodes of type `type`
|
107
|
+
considered ok iff the property was found and its conncrete value matched its value type (if given in the schema)
|
108
|
+
or the property was not found and was specified as `"relevant": false`
|
109
|
+
* `connections/type/properties/prop/counts` number of properties `prop` in edges of type `type`
|
110
|
+
verified similar to nodes
|
111
|
+
* `all/node_counts` number of nodes
|
112
|
+
considered ok iff the node was ok according to its type
|
113
|
+
* `all/connection_counts` number of edges
|
114
|
+
considered ok iff the node was ok according to its type
|
115
|
+
|
116
|
+
Additionally, for each edge of edge type `edge_type` it is verified, wether it's source and destination node types
|
117
|
+
`src_type` and `dst_type` are found in `connections/type/sources` and `connections/type/targets` respectively.
|
118
|
+
Any edge type, for which these arrays are missing is considered to be verified by this test. Again, the results are
|
119
|
+
aggregated into various statistics:
|
120
|
+
|
121
|
+
* `connections/edge_type/src_stats/[ { "name": src_type, "to_dst": [ { "name": dst_type, "counts": /*count */ } ] } ]`
|
122
|
+
* `connections/edge_type/dst_stats/[ { "name": dst_type, "from_src": [ { "name": src_type, "counts": /*count */ } ] } ]`
|
123
|
+
* `nodes/src_type/src_stats/edge_type`
|
124
|
+
* `nodes/dst_type/dst_stats/edge_type`
|
125
|
+
|
126
|
+
|
127
|
+
### Optional type testing
|
128
|
+
|
129
|
+
Additionally, schema properties may have a string-valued `type` property for testing property *values.
|
130
|
+
To register a type test for property values, your implementation of `Typer` needs to mixin
|
131
|
+
`TyperValueTableMixin` (true for NeoScout::GDB_Neo4j::Typer). Then, just call:
|
132
|
+
|
133
|
+
```ruby
|
134
|
+
typer.value_type_table['string'] = lambda { |n,v| v.kind_of? String }
|
135
|
+
```
|
136
|
+
|
137
|
+
to register your type tests. Properties that have a `type` attribute, and for which a type test is
|
138
|
+
registered but whose value fails the test are considered as failed and reported accordingly.
|
139
|
+
|
140
|
+
|
141
|
+
### Example
|
142
|
+
|
143
|
+
Please see `spec/lib/neoscout/gdb_neo4_spec.rb`, `spec/lib/neoscout/gdb_neo4_spec_schema.json`,
|
144
|
+
`spec/lib/neoscout/gdb_neo4_spec_counts.json` for an extended example.
|
145
|
+
|
146
|
+
|
147
|
+
## Standalone runner
|
148
|
+
|
149
|
+
There is a rudimentary, sinatra-based standalone runner that attaches to a local neo4j databases, and upon request
|
150
|
+
fetches a schema url and verifies the database against it. It is in `scripts/neoscout` and is installed by default.
|
151
|
+
Please consult `neoscout --help` for more details.
|
152
|
+
|
153
|
+
### Webservice API
|
154
|
+
|
155
|
+
The standalone runner can be run as a RESTful webservice using `-w`. If this is done, it suppors the
|
156
|
+
follwing API
|
157
|
+
|
158
|
+
* `/schema` retrieve schema
|
159
|
+
* `/verify` trigger verification
|
160
|
+
* `/shutdown` shutdown
|
161
|
+
|
162
|
+
|
163
|
+
## Implementation Notes
|
164
|
+
|
165
|
+
This is how things work right now but expect a major change in architecture in the next version.
|
166
|
+
|
167
|
+
`Scout` is the main class that implements the generic logic for processing nodes and edges using several
|
168
|
+
helper classes
|
169
|
+
|
170
|
+
* `Typer` assigns types to nodes and edges
|
171
|
+
* `Verifier` checks all schema properties
|
172
|
+
* `Iterator` provides iteration constructs for iterating over the nodes and edges of the underlying graph
|
173
|
+
* furthermore, `Scout` features overridable factory methods for the construction of subclasses implementing
|
174
|
+
various Constraints, most importantly the node and edge propery constraints. Basic implementations of those
|
175
|
+
are provided in `constraints.rb`
|
176
|
+
|
177
|
+
`Counts` is used for collecting statistics and heavily tied to logic implemented by `Scout`.
|
178
|
+
|
179
|
+
JSON schema handling is in `json_schema.rb`
|
180
|
+
|
181
|
+
|
182
|
+
### Specializing for a new database
|
183
|
+
|
184
|
+
Please consult `gdb_neo4j.rb` to see how to do that, essentially you subclass `Scout` and potentially override default
|
185
|
+
values for the various member fields. The standalone runner currently is heavily tied to neo4j.
|
186
|
+
|
187
|
+
|
188
|
+
### Notes on GDB_Neo4j
|
189
|
+
|
190
|
+
* Node types are currently derived from a configurable property (defaults to '_classname')
|
191
|
+
* Edge types directly correspond to the relationship type in neo4j
|
192
|
+
* Unkown nodes/edges are assigned to a reserved `__NOTYPE__` type (the actual string may be overriden, see Typer)
|
193
|
+
* You can pass configuration options for neo4j using `-C <path-to-yml>`. This is especially important for larger
|
194
|
+
databases. See etc/neo4j.yml for an example.
|
data/Rakefile
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
3
|
+
require 'rdoc/task'
|
4
|
+
require 'rspec'
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
|
7
|
+
desc 'Run all rspecs'
|
8
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
9
|
+
spec.fail_on_error = true
|
10
|
+
spec.verbose = false
|
11
|
+
# spec.rspec_opts = ['--backtrace']
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Run rdoc over project sources'
|
15
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
16
|
+
# rdoc.main = "README.rdoc"
|
17
|
+
rdoc.rdoc_files.include("lib/**/*.rb")
|
18
|
+
end
|
19
|
+
|
20
|
+
desc 'Run irb in project environment'
|
21
|
+
task :console do
|
22
|
+
require 'irb'
|
23
|
+
ARGV.clear
|
24
|
+
IRB.conf[:USE_READLINE] = false if ENV['JRUBY_OPTS'] =~ /--ng/
|
25
|
+
IRB.start
|
26
|
+
end
|
27
|
+
|
28
|
+
task :doc => :rdoc
|
29
|
+
task :test => :spec
|
30
|
+
task :irb => :console
|
data/TODO.org
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
* Version 0.1
|
2
|
+
** DONE Minimal documentation
|
3
|
+
** Features
|
4
|
+
*** DONE type testing mechanism
|
5
|
+
*** DONE cardconstraints if ever: nixed, to be replaced with pacer-jogger constraints in 0.2
|
6
|
+
*** visualization
|
7
|
+
*** read-only mode for neo4j
|
8
|
+
** Integration and (alongside) testing
|
9
|
+
*** put into sheldon
|
10
|
+
*** limit number of queries / share results(?)
|
11
|
+
*** more tests
|
12
|
+
* Version 0.2
|
13
|
+
** Redesign internal structure
|
14
|
+
*** Brainstoerming
|
15
|
+
** Features
|
16
|
+
*** aggregate min-max-values -> recover largest edge ids
|
17
|
+
*** more precise schema input
|
18
|
+
*** pacer-jogger constraint support
|
19
|
+
*** cypher constraint support
|
20
|
+
*** query execution tool (?)
|
21
|
+
* Version 0.3
|
22
|
+
** Cleanup standalone runner
|
23
|
+
*** DONE Refactor into class
|
24
|
+
*** Push options to sinatra
|
25
|
+
*** Better initialization (i.e. register types)
|
26
|
+
** Support for blueprints graphs (?)
|
27
|
+
** Parallelize iteration over graph
|