cmap 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/cmap.gemspec +1 -1
- data/lib/cmap.rb +1 -1
- data/lib/cmap/edges_to_queries.rb +3 -3
- data/lib/cmap/propositions_to_graph.rb +16 -3
- data/lib/cmap/propositions_to_sql.rb +1 -5
- data/lib/cmap/sanitize_graph.rb +42 -0
- data/lib/cmap/subquery_expander.rb +8 -3
- data/lib/cmap/version.rb +1 -1
- metadata +5 -5
- data/lib/cmap/graph_sanitizer.rb +0 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 260c10743062b043941c9cb50d306107fe4a7d0a
|
4
|
+
data.tar.gz: e51eb88311d3af6e68564dd1e99c01fe941fb80b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1da1e5e4b8f95d7c64dcb9cec10c2e054a851027074a197d6fedaec33f41e05b2bf4cf5d03a34d2ca1de6050c4031b6e1a7a95e969dbb095010a33e2aa0259e1
|
7
|
+
data.tar.gz: a697dcc4d94b53ee96ee893c266316c3cf6f695e8fd173f7e02066584fed26519b6e53024897eb4a4edfee19d1893744f14fddffcc68108089bbe03025584c9c
|
data/cmap.gemspec
CHANGED
data/lib/cmap.rb
CHANGED
@@ -6,7 +6,7 @@ require 'pry'
|
|
6
6
|
|
7
7
|
require 'directed_graph'
|
8
8
|
|
9
|
-
require_relative "./cmap/
|
9
|
+
require_relative "./cmap/sanitize_graph.rb"
|
10
10
|
require_relative "./cmap/subquery_expander.rb"
|
11
11
|
require_relative "./cmap/edges_to_queries.rb"
|
12
12
|
require_relative "./cmap/propositions_to_graph.rb"
|
@@ -19,11 +19,11 @@ module Cmap; class EdgesToQueries
|
|
19
19
|
private
|
20
20
|
|
21
21
|
def unique_edges
|
22
|
-
edges.uniq {|e| [e.destination_vertex, e.
|
22
|
+
edges.uniq {|e| [e.destination_vertex.data[:sanitized_name], e.data[:sanitized_value]]}
|
23
23
|
end
|
24
24
|
|
25
25
|
def add_columns_queries
|
26
|
-
unique_edges.map {|e| "alter table #{schema_name}.#{table_name} add column #{e.destination_vertex} int2;"}
|
26
|
+
unique_edges.map {|e| "alter table #{schema_name}.#{table_name} add column #{e.destination_vertex.data[:sanitized_name]} int2;"}
|
27
27
|
end
|
28
28
|
|
29
29
|
def grouped_edges
|
@@ -35,7 +35,7 @@ module Cmap; class EdgesToQueries
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def updates
|
38
|
-
u = (grouped_edges[false] || []).map {|e| "#{e.destination_vertex}=(#{e.
|
38
|
+
u = (grouped_edges[false] || []).map {|e| "#{e.destination_vertex.data[:sanitized_name]}=(#{e.data[:sanitized_value]})::int"}.join(", ")
|
39
39
|
return [] if u.empty?
|
40
40
|
["update #{schema_name}.#{table_name} set #{u};"]
|
41
41
|
end
|
@@ -4,22 +4,35 @@ module Cmap; class PropositionsToGraph
|
|
4
4
|
|
5
5
|
def initialize(propositions_path)
|
6
6
|
@propositions_path = propositions_path
|
7
|
+
@vertices = []
|
7
8
|
end
|
8
9
|
|
9
10
|
def graph
|
10
|
-
DirectedGraph::Graph.new(edges)
|
11
|
+
SanitizeGraph.new(DirectedGraph::Graph.new(edges)).sanitize
|
11
12
|
end
|
12
13
|
|
13
14
|
private
|
14
15
|
|
15
16
|
def edges
|
16
17
|
@edges ||= propositions.inject([]) do |memo, e|
|
17
|
-
|
18
|
-
|
18
|
+
origin_vertex_name, value, destination_vertex_name = e
|
19
|
+
origin_vertex = find_vertex_or_create(origin_vertex_name)
|
20
|
+
destination_vertex = find_vertex_or_create(destination_vertex_name)
|
21
|
+
data = { :value => value }
|
22
|
+
memo << DirectedGraph::Edge.new(origin_vertex: origin_vertex, destination_vertex: destination_vertex, data: data)
|
19
23
|
memo
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
27
|
+
def find_vertex_or_create(vertex_name)
|
28
|
+
vertex = @vertices.find {|v| v.name == vertex_name}
|
29
|
+
return vertex if vertex
|
30
|
+
data = { :name => vertex_name }
|
31
|
+
v = DirectedGraph::Vertex.new(name: vertex_name, data: data)
|
32
|
+
@vertices << v
|
33
|
+
v
|
34
|
+
end
|
35
|
+
|
23
36
|
def propositions
|
24
37
|
csv_path = File.expand_path(propositions_path, File.dirname(__FILE__))
|
25
38
|
CSV.read(csv_path, { :col_sep => "\t", :quote_char => '"' })
|
@@ -21,12 +21,8 @@ module Cmap; class PropositionsToSql
|
|
21
21
|
propositions_to_graph.graph
|
22
22
|
end
|
23
23
|
|
24
|
-
def sanitized_graph
|
25
|
-
GraphSanitizer.new(raw_graph).sanitized_graph
|
26
|
-
end
|
27
|
-
|
28
24
|
def graph_to_sql
|
29
|
-
GraphToSql.new(table_name, schema_name,
|
25
|
+
GraphToSql.new(table_name, schema_name, raw_graph, subquery_gsubs)
|
30
26
|
end
|
31
27
|
|
32
28
|
end; end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Cmap; class SanitizeGraph
|
2
|
+
|
3
|
+
def initialize(graph)
|
4
|
+
@graph = graph
|
5
|
+
end
|
6
|
+
|
7
|
+
def sanitize
|
8
|
+
sanitize_vertices
|
9
|
+
sanitize_edges
|
10
|
+
@graph
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def sanitize_vertices
|
16
|
+
@graph.vertices.each { |v| v.data[:sanitized_name] = sanitize_string(v.data[:name]) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def ordered_vertices
|
20
|
+
# HACK: this sorting makes the assumption vertices lower in the graph have longer ids
|
21
|
+
@ordered_vertices ||= @graph.vertices.sort do |a, b|
|
22
|
+
b.name.length <=> a.name.length
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def sanitize_edges
|
27
|
+
@graph.edges.each { |edge| sanitize_edge(edge) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def sanitize_edge(edge)
|
31
|
+
ordered_vertices.each do |v|
|
32
|
+
edge.data[:sanitized_value] = edge.data[:value].gsub(v.data[:name], v.data[:sanitized_name])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def sanitize_string(string)
|
39
|
+
string.gsub(/[^0-9a-zA-Z]+/, '_').downcase
|
40
|
+
end
|
41
|
+
|
42
|
+
end; end
|
@@ -9,12 +9,17 @@ module Cmap; class SubqueryExpander
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def update_query?(edge)
|
12
|
-
edge.
|
12
|
+
edge.data[:sanitized_value] != query(edge)
|
13
13
|
end
|
14
14
|
|
15
15
|
def query(edge)
|
16
|
-
r = edge.
|
17
|
-
replacements = [
|
16
|
+
r = edge.data[:sanitized_value]
|
17
|
+
replacements = [
|
18
|
+
["+table_name+", table_name],
|
19
|
+
["+schema_name+", schema_name],
|
20
|
+
["+destination_vertex+", edge.destination_vertex.data[:sanitized_name]],
|
21
|
+
["+origin_vertex+", edge.origin_vertex.data[:sanitized_name]]
|
22
|
+
]
|
18
23
|
(subquery_gsubs + replacements).each {|gsub| r = r.gsub(*gsub)}
|
19
24
|
r
|
20
25
|
end
|
data/lib/cmap/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MrPowers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-01-
|
11
|
+
date: 2016-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - '='
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
89
|
+
version: 0.6.0
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - '='
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
96
|
+
version: 0.6.0
|
97
97
|
description: Converts cmap exports that follow strict conventions into postgreSQL
|
98
98
|
for data analysis
|
99
99
|
email:
|
@@ -116,10 +116,10 @@ files:
|
|
116
116
|
- example/cmap_to_sql.rb
|
117
117
|
- lib/cmap.rb
|
118
118
|
- lib/cmap/edges_to_queries.rb
|
119
|
-
- lib/cmap/graph_sanitizer.rb
|
120
119
|
- lib/cmap/graph_to_sql.rb
|
121
120
|
- lib/cmap/propositions_to_graph.rb
|
122
121
|
- lib/cmap/propositions_to_sql.rb
|
122
|
+
- lib/cmap/sanitize_graph.rb
|
123
123
|
- lib/cmap/sql_runner.rb
|
124
124
|
- lib/cmap/subquery_expander.rb
|
125
125
|
- lib/cmap/version.rb
|
data/lib/cmap/graph_sanitizer.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
module Cmap; class GraphSanitizer
|
2
|
-
|
3
|
-
attr_reader :graph
|
4
|
-
|
5
|
-
def initialize(graph)
|
6
|
-
@graph = graph
|
7
|
-
@sanitized_vertex_lookup = {}
|
8
|
-
end
|
9
|
-
|
10
|
-
def sanitized_graph
|
11
|
-
DirectedGraph::Graph.new(sanitized_edges)
|
12
|
-
end
|
13
|
-
|
14
|
-
def edges_with_sanitized_vertices
|
15
|
-
graph.edges.map do |edge|
|
16
|
-
origin_vertex = sanitize_string(edge.origin_vertex)
|
17
|
-
destination_vertex = sanitize_string(edge.destination_vertex)
|
18
|
-
|
19
|
-
@sanitized_vertex_lookup[edge.origin_vertex] = origin_vertex
|
20
|
-
@sanitized_vertex_lookup[edge.destination_vertex] = destination_vertex
|
21
|
-
|
22
|
-
{origin_vertex: origin_vertex, destination_vertex: destination_vertex, value: edge.value}
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def sanitize_edge_value(edge_args)
|
27
|
-
@sanitized_vertex_lookup.keys.sort.reverse.each do |original_vertex_name|
|
28
|
-
edge_args[:value].gsub!(original_vertex_name, @sanitized_vertex_lookup[original_vertex_name])
|
29
|
-
end
|
30
|
-
edge_args
|
31
|
-
end
|
32
|
-
|
33
|
-
def sanitized_edges
|
34
|
-
edges_with_sanitized_vertices.map do |edge_args|
|
35
|
-
DirectedGraph::Edge.new(sanitize_edge_value(edge_args))
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def sanitize_string(string)
|
40
|
-
string.gsub(/[^0-9a-zA-Z]+/, '_').downcase
|
41
|
-
end
|
42
|
-
|
43
|
-
end; end
|
44
|
-
|