cmap 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c1aa04d3a2f907856c9b8ee5585bfad160f5537
4
- data.tar.gz: 73a72a40cc5dded9d995413e09c76024968b7482
3
+ metadata.gz: 260c10743062b043941c9cb50d306107fe4a7d0a
4
+ data.tar.gz: e51eb88311d3af6e68564dd1e99c01fe941fb80b
5
5
  SHA512:
6
- metadata.gz: 562e1751d18d1f84671c6b4d6137cfcb4c12ef4739157f6b51cecdb563cbf08b6d64b32d42c4e580d0e994680d6455b9de511051de89637617eba8958560adb5
7
- data.tar.gz: 42f17d5ef2eeff14a77f461c9d1f61c894296873174b9b85d396d02f5b436822aa5c644c94f79626060840b6da53b84313966c8e955459bf954f02f341b1efd0
6
+ metadata.gz: 1da1e5e4b8f95d7c64dcb9cec10c2e054a851027074a197d6fedaec33f41e05b2bf4cf5d03a34d2ca1de6050c4031b6e1a7a95e969dbb095010a33e2aa0259e1
7
+ data.tar.gz: a697dcc4d94b53ee96ee893c266316c3cf6f695e8fd173f7e02066584fed26519b6e53024897eb4a4edfee19d1893744f14fddffcc68108089bbe03025584c9c
@@ -25,5 +25,5 @@ Gem::Specification.new do |spec|
25
25
 
26
26
  spec.add_dependency "pg", "0.18.3"
27
27
  spec.add_dependency "pry"
28
- spec.add_dependency "directed_graph", '0.3.0'
28
+ spec.add_dependency "directed_graph", "0.6.0"
29
29
  end
@@ -6,7 +6,7 @@ require 'pry'
6
6
 
7
7
  require 'directed_graph'
8
8
 
9
- require_relative "./cmap/graph_sanitizer.rb"
9
+ require_relative "./cmap/sanitize_graph.rb"
10
10
  require_relative "./cmap/subquery_expander.rb"
11
11
  require_relative "./cmap/edges_to_queries.rb"
12
12
  require_relative "./cmap/propositions_to_graph.rb"
@@ -19,11 +19,11 @@ module Cmap; class EdgesToQueries
19
19
  private
20
20
 
21
21
  def unique_edges
22
- edges.uniq {|e| [e.destination_vertex, e.value]}
22
+ edges.uniq {|e| [e.destination_vertex.data[:sanitized_name], e.data[:sanitized_value]]}
23
23
  end
24
24
 
25
25
  def add_columns_queries
26
- unique_edges.map {|e| "alter table #{schema_name}.#{table_name} add column #{e.destination_vertex} int2;"}
26
+ unique_edges.map {|e| "alter table #{schema_name}.#{table_name} add column #{e.destination_vertex.data[:sanitized_name]} int2;"}
27
27
  end
28
28
 
29
29
  def grouped_edges
@@ -35,7 +35,7 @@ module Cmap; class EdgesToQueries
35
35
  end
36
36
 
37
37
  def updates
38
- u = (grouped_edges[false] || []).map {|e| "#{e.destination_vertex}=(#{e.value})::int"}.join(", ")
38
+ u = (grouped_edges[false] || []).map {|e| "#{e.destination_vertex.data[:sanitized_name]}=(#{e.data[:sanitized_value]})::int"}.join(", ")
39
39
  return [] if u.empty?
40
40
  ["update #{schema_name}.#{table_name} set #{u};"]
41
41
  end
@@ -4,22 +4,35 @@ module Cmap; class PropositionsToGraph
4
4
 
5
5
  def initialize(propositions_path)
6
6
  @propositions_path = propositions_path
7
+ @vertices = []
7
8
  end
8
9
 
9
10
  def graph
10
- DirectedGraph::Graph.new(edges)
11
+ SanitizeGraph.new(DirectedGraph::Graph.new(edges)).sanitize
11
12
  end
12
13
 
13
14
  private
14
15
 
15
16
  def edges
16
17
  @edges ||= propositions.inject([]) do |memo, e|
17
- origin_vertex, value, destination_vertex = e
18
- memo << DirectedGraph::Edge.new(origin_vertex: origin_vertex, destination_vertex: destination_vertex, value: value)
18
+ origin_vertex_name, value, destination_vertex_name = e
19
+ origin_vertex = find_vertex_or_create(origin_vertex_name)
20
+ destination_vertex = find_vertex_or_create(destination_vertex_name)
21
+ data = { :value => value }
22
+ memo << DirectedGraph::Edge.new(origin_vertex: origin_vertex, destination_vertex: destination_vertex, data: data)
19
23
  memo
20
24
  end
21
25
  end
22
26
 
27
+ def find_vertex_or_create(vertex_name)
28
+ vertex = @vertices.find {|v| v.name == vertex_name}
29
+ return vertex if vertex
30
+ data = { :name => vertex_name }
31
+ v = DirectedGraph::Vertex.new(name: vertex_name, data: data)
32
+ @vertices << v
33
+ v
34
+ end
35
+
23
36
  def propositions
24
37
  csv_path = File.expand_path(propositions_path, File.dirname(__FILE__))
25
38
  CSV.read(csv_path, { :col_sep => "\t", :quote_char => '"' })
@@ -21,12 +21,8 @@ module Cmap; class PropositionsToSql
21
21
  propositions_to_graph.graph
22
22
  end
23
23
 
24
- def sanitized_graph
25
- GraphSanitizer.new(raw_graph).sanitized_graph
26
- end
27
-
28
24
  def graph_to_sql
29
- GraphToSql.new(table_name, schema_name, sanitized_graph, subquery_gsubs)
25
+ GraphToSql.new(table_name, schema_name, raw_graph, subquery_gsubs)
30
26
  end
31
27
 
32
28
  end; end
@@ -0,0 +1,42 @@
1
+ module Cmap; class SanitizeGraph
2
+
3
+ def initialize(graph)
4
+ @graph = graph
5
+ end
6
+
7
+ def sanitize
8
+ sanitize_vertices
9
+ sanitize_edges
10
+ @graph
11
+ end
12
+
13
+ private
14
+
15
+ def sanitize_vertices
16
+ @graph.vertices.each { |v| v.data[:sanitized_name] = sanitize_string(v.data[:name]) }
17
+ end
18
+
19
+ def ordered_vertices
20
+ # HACK: this sorting makes the assumption vertices lower in the graph have longer ids
21
+ @ordered_vertices ||= @graph.vertices.sort do |a, b|
22
+ b.name.length <=> a.name.length
23
+ end
24
+ end
25
+
26
+ def sanitize_edges
27
+ @graph.edges.each { |edge| sanitize_edge(edge) }
28
+ end
29
+
30
+ def sanitize_edge(edge)
31
+ ordered_vertices.each do |v|
32
+ edge.data[:sanitized_value] = edge.data[:value].gsub(v.data[:name], v.data[:sanitized_name])
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def sanitize_string(string)
39
+ string.gsub(/[^0-9a-zA-Z]+/, '_').downcase
40
+ end
41
+
42
+ end; end
@@ -9,12 +9,17 @@ module Cmap; class SubqueryExpander
9
9
  end
10
10
 
11
11
  def update_query?(edge)
12
- edge.value != query(edge)
12
+ edge.data[:sanitized_value] != query(edge)
13
13
  end
14
14
 
15
15
  def query(edge)
16
- r = edge.value
17
- replacements = [["+table_name+", table_name], ["+schema_name+", schema_name], ["+destination_vertex+", edge.destination_vertex], ["+origin_vertex+", edge.origin_vertex]]
16
+ r = edge.data[:sanitized_value]
17
+ replacements = [
18
+ ["+table_name+", table_name],
19
+ ["+schema_name+", schema_name],
20
+ ["+destination_vertex+", edge.destination_vertex.data[:sanitized_name]],
21
+ ["+origin_vertex+", edge.origin_vertex.data[:sanitized_name]]
22
+ ]
18
23
  (subquery_gsubs + replacements).each {|gsub| r = r.gsub(*gsub)}
19
24
  r
20
25
  end
@@ -1,3 +1,3 @@
1
1
  module Cmap
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - MrPowers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-01-04 00:00:00.000000000 Z
11
+ date: 2016-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - '='
88
88
  - !ruby/object:Gem::Version
89
- version: 0.3.0
89
+ version: 0.6.0
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - '='
95
95
  - !ruby/object:Gem::Version
96
- version: 0.3.0
96
+ version: 0.6.0
97
97
  description: Converts cmap exports that follow strict conventions into postgreSQL
98
98
  for data analysis
99
99
  email:
@@ -116,10 +116,10 @@ files:
116
116
  - example/cmap_to_sql.rb
117
117
  - lib/cmap.rb
118
118
  - lib/cmap/edges_to_queries.rb
119
- - lib/cmap/graph_sanitizer.rb
120
119
  - lib/cmap/graph_to_sql.rb
121
120
  - lib/cmap/propositions_to_graph.rb
122
121
  - lib/cmap/propositions_to_sql.rb
122
+ - lib/cmap/sanitize_graph.rb
123
123
  - lib/cmap/sql_runner.rb
124
124
  - lib/cmap/subquery_expander.rb
125
125
  - lib/cmap/version.rb
@@ -1,44 +0,0 @@
1
- module Cmap; class GraphSanitizer
2
-
3
- attr_reader :graph
4
-
5
- def initialize(graph)
6
- @graph = graph
7
- @sanitized_vertex_lookup = {}
8
- end
9
-
10
- def sanitized_graph
11
- DirectedGraph::Graph.new(sanitized_edges)
12
- end
13
-
14
- def edges_with_sanitized_vertices
15
- graph.edges.map do |edge|
16
- origin_vertex = sanitize_string(edge.origin_vertex)
17
- destination_vertex = sanitize_string(edge.destination_vertex)
18
-
19
- @sanitized_vertex_lookup[edge.origin_vertex] = origin_vertex
20
- @sanitized_vertex_lookup[edge.destination_vertex] = destination_vertex
21
-
22
- {origin_vertex: origin_vertex, destination_vertex: destination_vertex, value: edge.value}
23
- end
24
- end
25
-
26
- def sanitize_edge_value(edge_args)
27
- @sanitized_vertex_lookup.keys.sort.reverse.each do |original_vertex_name|
28
- edge_args[:value].gsub!(original_vertex_name, @sanitized_vertex_lookup[original_vertex_name])
29
- end
30
- edge_args
31
- end
32
-
33
- def sanitized_edges
34
- edges_with_sanitized_vertices.map do |edge_args|
35
- DirectedGraph::Edge.new(sanitize_edge_value(edge_args))
36
- end
37
- end
38
-
39
- def sanitize_string(string)
40
- string.gsub(/[^0-9a-zA-Z]+/, '_').downcase
41
- end
42
-
43
- end; end
44
-