cmap 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/cmap.gemspec +1 -1
- data/lib/cmap.rb +1 -1
- data/lib/cmap/edges_to_queries.rb +3 -3
- data/lib/cmap/propositions_to_graph.rb +16 -3
- data/lib/cmap/propositions_to_sql.rb +1 -5
- data/lib/cmap/sanitize_graph.rb +42 -0
- data/lib/cmap/subquery_expander.rb +8 -3
- data/lib/cmap/version.rb +1 -1
- metadata +5 -5
- data/lib/cmap/graph_sanitizer.rb +0 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 260c10743062b043941c9cb50d306107fe4a7d0a
|
4
|
+
data.tar.gz: e51eb88311d3af6e68564dd1e99c01fe941fb80b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1da1e5e4b8f95d7c64dcb9cec10c2e054a851027074a197d6fedaec33f41e05b2bf4cf5d03a34d2ca1de6050c4031b6e1a7a95e969dbb095010a33e2aa0259e1
|
7
|
+
data.tar.gz: a697dcc4d94b53ee96ee893c266316c3cf6f695e8fd173f7e02066584fed26519b6e53024897eb4a4edfee19d1893744f14fddffcc68108089bbe03025584c9c
|
data/cmap.gemspec
CHANGED
data/lib/cmap.rb
CHANGED
@@ -6,7 +6,7 @@ require 'pry'
|
|
6
6
|
|
7
7
|
require 'directed_graph'
|
8
8
|
|
9
|
-
require_relative "./cmap/
|
9
|
+
require_relative "./cmap/sanitize_graph.rb"
|
10
10
|
require_relative "./cmap/subquery_expander.rb"
|
11
11
|
require_relative "./cmap/edges_to_queries.rb"
|
12
12
|
require_relative "./cmap/propositions_to_graph.rb"
|
@@ -19,11 +19,11 @@ module Cmap; class EdgesToQueries
|
|
19
19
|
private
|
20
20
|
|
21
21
|
def unique_edges
|
22
|
-
edges.uniq {|e| [e.destination_vertex, e.
|
22
|
+
edges.uniq {|e| [e.destination_vertex.data[:sanitized_name], e.data[:sanitized_value]]}
|
23
23
|
end
|
24
24
|
|
25
25
|
def add_columns_queries
|
26
|
-
unique_edges.map {|e| "alter table #{schema_name}.#{table_name} add column #{e.destination_vertex} int2;"}
|
26
|
+
unique_edges.map {|e| "alter table #{schema_name}.#{table_name} add column #{e.destination_vertex.data[:sanitized_name]} int2;"}
|
27
27
|
end
|
28
28
|
|
29
29
|
def grouped_edges
|
@@ -35,7 +35,7 @@ module Cmap; class EdgesToQueries
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def updates
|
38
|
-
u = (grouped_edges[false] || []).map {|e| "#{e.destination_vertex}=(#{e.
|
38
|
+
u = (grouped_edges[false] || []).map {|e| "#{e.destination_vertex.data[:sanitized_name]}=(#{e.data[:sanitized_value]})::int"}.join(", ")
|
39
39
|
return [] if u.empty?
|
40
40
|
["update #{schema_name}.#{table_name} set #{u};"]
|
41
41
|
end
|
@@ -4,22 +4,35 @@ module Cmap; class PropositionsToGraph
|
|
4
4
|
|
5
5
|
def initialize(propositions_path)
|
6
6
|
@propositions_path = propositions_path
|
7
|
+
@vertices = []
|
7
8
|
end
|
8
9
|
|
9
10
|
def graph
|
10
|
-
DirectedGraph::Graph.new(edges)
|
11
|
+
SanitizeGraph.new(DirectedGraph::Graph.new(edges)).sanitize
|
11
12
|
end
|
12
13
|
|
13
14
|
private
|
14
15
|
|
15
16
|
def edges
|
16
17
|
@edges ||= propositions.inject([]) do |memo, e|
|
17
|
-
|
18
|
-
|
18
|
+
origin_vertex_name, value, destination_vertex_name = e
|
19
|
+
origin_vertex = find_vertex_or_create(origin_vertex_name)
|
20
|
+
destination_vertex = find_vertex_or_create(destination_vertex_name)
|
21
|
+
data = { :value => value }
|
22
|
+
memo << DirectedGraph::Edge.new(origin_vertex: origin_vertex, destination_vertex: destination_vertex, data: data)
|
19
23
|
memo
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
27
|
+
def find_vertex_or_create(vertex_name)
|
28
|
+
vertex = @vertices.find {|v| v.name == vertex_name}
|
29
|
+
return vertex if vertex
|
30
|
+
data = { :name => vertex_name }
|
31
|
+
v = DirectedGraph::Vertex.new(name: vertex_name, data: data)
|
32
|
+
@vertices << v
|
33
|
+
v
|
34
|
+
end
|
35
|
+
|
23
36
|
def propositions
|
24
37
|
csv_path = File.expand_path(propositions_path, File.dirname(__FILE__))
|
25
38
|
CSV.read(csv_path, { :col_sep => "\t", :quote_char => '"' })
|
@@ -21,12 +21,8 @@ module Cmap; class PropositionsToSql
|
|
21
21
|
propositions_to_graph.graph
|
22
22
|
end
|
23
23
|
|
24
|
-
def sanitized_graph
|
25
|
-
GraphSanitizer.new(raw_graph).sanitized_graph
|
26
|
-
end
|
27
|
-
|
28
24
|
def graph_to_sql
|
29
|
-
GraphToSql.new(table_name, schema_name,
|
25
|
+
GraphToSql.new(table_name, schema_name, raw_graph, subquery_gsubs)
|
30
26
|
end
|
31
27
|
|
32
28
|
end; end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Cmap; class SanitizeGraph
|
2
|
+
|
3
|
+
def initialize(graph)
|
4
|
+
@graph = graph
|
5
|
+
end
|
6
|
+
|
7
|
+
def sanitize
|
8
|
+
sanitize_vertices
|
9
|
+
sanitize_edges
|
10
|
+
@graph
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def sanitize_vertices
|
16
|
+
@graph.vertices.each { |v| v.data[:sanitized_name] = sanitize_string(v.data[:name]) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def ordered_vertices
|
20
|
+
# HACK: this sorting makes the assumption vertices lower in the graph have longer ids
|
21
|
+
@ordered_vertices ||= @graph.vertices.sort do |a, b|
|
22
|
+
b.name.length <=> a.name.length
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def sanitize_edges
|
27
|
+
@graph.edges.each { |edge| sanitize_edge(edge) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def sanitize_edge(edge)
|
31
|
+
ordered_vertices.each do |v|
|
32
|
+
edge.data[:sanitized_value] = edge.data[:value].gsub(v.data[:name], v.data[:sanitized_name])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def sanitize_string(string)
|
39
|
+
string.gsub(/[^0-9a-zA-Z]+/, '_').downcase
|
40
|
+
end
|
41
|
+
|
42
|
+
end; end
|
@@ -9,12 +9,17 @@ module Cmap; class SubqueryExpander
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def update_query?(edge)
|
12
|
-
edge.
|
12
|
+
edge.data[:sanitized_value] != query(edge)
|
13
13
|
end
|
14
14
|
|
15
15
|
def query(edge)
|
16
|
-
r = edge.
|
17
|
-
replacements = [
|
16
|
+
r = edge.data[:sanitized_value]
|
17
|
+
replacements = [
|
18
|
+
["+table_name+", table_name],
|
19
|
+
["+schema_name+", schema_name],
|
20
|
+
["+destination_vertex+", edge.destination_vertex.data[:sanitized_name]],
|
21
|
+
["+origin_vertex+", edge.origin_vertex.data[:sanitized_name]]
|
22
|
+
]
|
18
23
|
(subquery_gsubs + replacements).each {|gsub| r = r.gsub(*gsub)}
|
19
24
|
r
|
20
25
|
end
|
data/lib/cmap/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MrPowers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - '='
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
89
|
+
version: 0.6.0
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - '='
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
96
|
+
version: 0.6.0
|
97
97
|
description: Converts cmap exports that follow strict conventions into postgreSQL
|
98
98
|
for data analysis
|
99
99
|
email:
|
@@ -116,10 +116,10 @@ files:
|
|
116
116
|
- example/cmap_to_sql.rb
|
117
117
|
- lib/cmap.rb
|
118
118
|
- lib/cmap/edges_to_queries.rb
|
119
|
-
- lib/cmap/graph_sanitizer.rb
|
120
119
|
- lib/cmap/graph_to_sql.rb
|
121
120
|
- lib/cmap/propositions_to_graph.rb
|
122
121
|
- lib/cmap/propositions_to_sql.rb
|
122
|
+
- lib/cmap/sanitize_graph.rb
|
123
123
|
- lib/cmap/sql_runner.rb
|
124
124
|
- lib/cmap/subquery_expander.rb
|
125
125
|
- lib/cmap/version.rb
|
data/lib/cmap/graph_sanitizer.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
module Cmap; class GraphSanitizer
|
2
|
-
|
3
|
-
attr_reader :graph
|
4
|
-
|
5
|
-
def initialize(graph)
|
6
|
-
@graph = graph
|
7
|
-
@sanitized_vertex_lookup = {}
|
8
|
-
end
|
9
|
-
|
10
|
-
def sanitized_graph
|
11
|
-
DirectedGraph::Graph.new(sanitized_edges)
|
12
|
-
end
|
13
|
-
|
14
|
-
def edges_with_sanitized_vertices
|
15
|
-
graph.edges.map do |edge|
|
16
|
-
origin_vertex = sanitize_string(edge.origin_vertex)
|
17
|
-
destination_vertex = sanitize_string(edge.destination_vertex)
|
18
|
-
|
19
|
-
@sanitized_vertex_lookup[edge.origin_vertex] = origin_vertex
|
20
|
-
@sanitized_vertex_lookup[edge.destination_vertex] = destination_vertex
|
21
|
-
|
22
|
-
{origin_vertex: origin_vertex, destination_vertex: destination_vertex, value: edge.value}
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def sanitize_edge_value(edge_args)
|
27
|
-
@sanitized_vertex_lookup.keys.sort.reverse.each do |original_vertex_name|
|
28
|
-
edge_args[:value].gsub!(original_vertex_name, @sanitized_vertex_lookup[original_vertex_name])
|
29
|
-
end
|
30
|
-
edge_args
|
31
|
-
end
|
32
|
-
|
33
|
-
def sanitized_edges
|
34
|
-
edges_with_sanitized_vertices.map do |edge_args|
|
35
|
-
DirectedGraph::Edge.new(sanitize_edge_value(edge_args))
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def sanitize_string(string)
|
40
|
-
string.gsub(/[^0-9a-zA-Z]+/, '_').downcase
|
41
|
-
end
|
42
|
-
|
43
|
-
end; end
|
44
|
-
|