rubyplb 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc ADDED
@@ -0,0 +1,45 @@
1
+ = RubyPLB
2
+
3
+ RubyPLB generates pattern lattice graphics from lists of patterns.
4
+
5
+ == Features
6
+
7
+ * Accept a text file with any number of patterns and generate a Graphviz DOT file, or a PNG/JPG/EPS image file.
8
+ * Calculate z-scores of pattern nodes and create lattice graphs with temperature colorng applied.
9
+
10
+ == Installation
11
+
12
+ Install the gem:
13
+
14
+ $sudo gem install rubyplb --source http://gemcutter.org
15
+
16
+ == How to Use
17
+
18
+ Usage:
19
+ rubyplb [options] <source file> <output file>
20
+
21
+ where:
22
+ <source file>
23
+ ".plb", ".txt"
24
+ <output file>
25
+ ."dot", ".png", ".jpg", or ".eps"
26
+ [options]:
27
+ --simple, -s: Use simple labels for pattern nodes
28
+ --full, -f: Generate a full pattern lattice without contracting nodes
29
+ --vertical, -v: Draw the graph from top to bottom instead of left to right)
30
+ --coloring, -c: Color pattern nodes
31
+ --straight, -t: Straighten edges (available when output format is either png, jpg, or eps)
32
+ --help, -h: Show this message
33
+
34
+ == ToDo
35
+
36
+ * Multiple input formats
37
+ * Database connection capability
38
+
39
+ == Links
40
+
41
+ under construction
42
+
43
+ == Copyright
44
+
45
+ Copyright (c) 2009 Kow Kuroda and Yoichiro Hasebe. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "rubyplb"
8
+ gem.summary = %Q{TODO: one-line summary of your gem}
9
+ gem.description = %Q{TODO: longer description of your gem}
10
+ gem.email = "yohasebe@gmail.com"
11
+ gem.homepage = "http://github.com/yohasebe/rubyplb"
12
+ gem.authors = ["Yoichiro Hasebe"]
13
+ gem.add_development_dependency "thoughtbot-shoulda"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
18
+ end
19
+
20
+ require 'rake/testtask'
21
+ Rake::TestTask.new(:test) do |test|
22
+ test.libs << 'lib' << 'test'
23
+ test.pattern = 'test/**/*_test.rb'
24
+ test.verbose = true
25
+ end
26
+
27
+ begin
28
+ require 'rcov/rcovtask'
29
+ Rcov::RcovTask.new do |test|
30
+ test.libs << 'test'
31
+ test.pattern = 'test/**/*_test.rb'
32
+ test.verbose = true
33
+ end
34
+ rescue LoadError
35
+ task :rcov do
36
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
37
+ end
38
+ end
39
+
40
+ task :test => :check_dependencies
41
+
42
+ task :default => :test
43
+
44
+ require 'rake/rdoctask'
45
+ Rake::RDocTask.new do |rdoc|
46
+ if File.exist?('VERSION')
47
+ version = File.read('VERSION')
48
+ else
49
+ version = ""
50
+ end
51
+
52
+ rdoc.rdoc_dir = 'rdoc'
53
+ rdoc.title = "rubyplb #{version}"
54
+ rdoc.rdoc_files.include('README*')
55
+ rdoc.rdoc_files.include('lib/**/*.rb')
56
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.4
data/bin/rubyplb ADDED
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+ require 'trollop'
5
+ require 'rubyplb'
6
+ require 'ruby_graphviz'
7
+
8
+ ########## parse options ##########
9
+
10
+ opts = Trollop::options do
11
+ version = File.read(File.dirname(__FILE__) + "/../VERSION")
12
+ banner <<-EOS
13
+
14
+ RubyPLB generates pattern lattice graphics from lists of patterns.
15
+
16
+ Usage:
17
+ rubyplb [options] <source file> <output file>
18
+
19
+ where:
20
+ <source file>
21
+ ".plb", ".txt"
22
+ <output file>
23
+ ."dot", ".png", ".jpg", or ".eps"
24
+ [options]:
25
+ EOS
26
+
27
+ opt :simple, "Use simple labels for pattern nodes", :default=> false
28
+ opt :full, "Generate a full pattern lattice without contracting nodes", :default=> false
29
+ opt :vertical, "Draw the graph from top to bottom instead of left to right)", :default => false
30
+ opt :coloring, "Color pattern nodes", :default => false
31
+ opt :straight, "Straighten edges (available when output format is either png, jpg, or eps)", :default => false
32
+ end
33
+
34
+ ############### main program ###############
35
+
36
+ if ARGV.size != 2
37
+ showerror("Input and output files are not set properly", 1)
38
+ end
39
+
40
+ filename1 = ARGV[0] #input filename
41
+ filename2 = ARGV[1] #output filename
42
+
43
+ #
44
+ # extract input and output file types
45
+ #
46
+ input_type = filename1.slice(/\.[^\.]+\z/).split(//)[1..-1].join("")
47
+ output_type = filename2.slice(/\.[^\.]+\z/).split(//)[1..-1].join("")
48
+
49
+ if (input_type !~ /\A(plb|txt)\z/ || output_type !~ /\A(dot|png|jpg|eps)\z/)
50
+ showerror("These file extensions are not (yet) supported.", 1)
51
+ end
52
+
53
+ #
54
+ # input cxt data is kept as plain text
55
+ #
56
+ f = File.open(filename1, "r")
57
+ inputdata = f.read
58
+ f.close
59
+
60
+ #
61
+ # ask for confirmation of overwriting an exisiting file
62
+ #
63
+ if (File.exist?(filename2) && !opts[:sil])
64
+ print "#{filename2} exists and will be overwritten, OK? [y/n]"
65
+ var1 = STDIN.gets;
66
+ if /y/i !~ var1
67
+ exit;
68
+ end
69
+ end
70
+
71
+ begin
72
+ f = File.open(filename1, "r")
73
+ sentences = f.readlines.delete_if{ |s| /^\s*$/ =~ s }
74
+ f.close
75
+ if sentences.empty?
76
+ showerror("Input file does not contain data.", 1)
77
+ end
78
+ rescue => e
79
+ showerror("Input file does not exist.", 1)
80
+ end
81
+
82
+ begin
83
+ pl = PatLattice.new(opts)
84
+ sentences.each do |sentence|
85
+ pl.insert(sentence, !opts[:full])
86
+ end
87
+ # rescue => e
88
+ # showerror("Source data may have problems. Process aborted.", 1)
89
+ end
90
+
91
+ #
92
+ # create the output file
93
+ #
94
+ case output_type
95
+ when "dot"
96
+ File.open(filename2, "w") do |f|
97
+ f.write(pl.generate_dot)
98
+ end
99
+ when "png"
100
+ pl.generate_img(filename2, "png", opts[:straight])
101
+ when "jpg"
102
+ pl.generate_img(filename2, "jpg", opts[:straight])
103
+ when "eps"
104
+ pl.generate_img(filename2, "eps", opts[:straight])
105
+ end
@@ -0,0 +1,167 @@
1
+ ## lib/ruby_graphviz.rb -- graphviz dot generator library
2
+ ## Author:: Yoichiro Hasebe (mailto: yohasebe@gmail.com)
3
+ ## Copyright:: Copyright 2009 Yoichiro Hasebe
4
+ ## License:: GNU GPL version 3
5
+
6
+ class RubyGraphviz
7
+
8
+ ## Example:
9
+ ##
10
+ ## g = RubyGraphviz.new("newgraph", {:rankdir => "LR", :nodesep => "0.4", :ranksep => "0.2"})
11
+ ##
12
+ def initialize(name, graph_hash = nil)
13
+ @name = name
14
+ @graph_data = graph_hash
15
+ @nodes = []
16
+ @edges = []
17
+ @dot = ""
18
+ create_graph
19
+ end
20
+
21
+ protected
22
+
23
+ def create_graph
24
+ @dot << "graph #{@name} {\n graph"
25
+ index = 0
26
+ if @graph_data
27
+ @dot << " ["
28
+ @graph_data.each do |k, v|
29
+ k = k.to_s
30
+ @dot << "#{k} = \"#{v}\""
31
+ index += 1
32
+ @dot << ", " unless index == @graph_data.size
33
+ end
34
+ @dot << "]"
35
+ end
36
+ @dot << ";\n"
37
+ end
38
+
39
+ def finish_graph
40
+ @dot << "}\n"
41
+ end
42
+
43
+ def create_edge(edgetype, nid1, nid2, edge_hash = nil)
44
+ temp = " #{nid1.to_s} #{edgetype} #{nid2.to_s}"
45
+ index = 0
46
+ if edge_hash
47
+ temp << " ["
48
+ edge_hash.each do |k, v|
49
+ k = k.to_s
50
+ temp << "#{k} = \"#{v}\""
51
+ index += 1
52
+ temp << ", " unless index == edge_hash.size
53
+ end
54
+ temp << "]"
55
+ end
56
+ return temp
57
+ end
58
+
59
+ public
60
+
61
+ ## Add a subgraph to a graph (recursively)
62
+ ##
63
+ ## Example:
64
+ ##
65
+ ## graph1.subgraph(graph2)
66
+ ##
67
+ def subgraph(graph)
68
+ @dot << graph.to_dot.sub(/\Agraph/, "subgraph")
69
+ end
70
+
71
+ ## Set default options for nodes
72
+ ##
73
+ ## Example:
74
+ ##
75
+ ## graph.node_default(:shape => "record", :color => "gray60")
76
+ ##
77
+ def node_default(node_hash = nil)
78
+ @dot << " node["
79
+ index = 0
80
+ node_hash.each do |k, v|
81
+ k = k.to_s
82
+ @dot << "#{k} = \"#{v}\""
83
+ index += 1
84
+ @dot << ", " unless index == node_hash.size
85
+ end
86
+ @dot << "];\n"
87
+ self
88
+ end
89
+
90
+ ## Set default options for edges
91
+ ##
92
+ ## Example:
93
+ ##
94
+ ## graph.edge_default(:color => "gray60")
95
+ ##
96
+ def edge_default(edge_hash = nil)
97
+ @dot << " edge["
98
+ index = 0
99
+ edge_hash.each do |k, v|
100
+ k = k.to_s
101
+ @dot << "#{k} = \"#{v}\""
102
+ index += 1
103
+ @dot << ", " unless index == edge_hash.size
104
+ end
105
+ @dot << "];\n"
106
+ self
107
+ end
108
+
109
+ ## Create a node with its options
110
+ ##
111
+ ## Example:
112
+ ##
113
+ ## graph.node("node-01", :label => "Node 01", :fillcolor => "pink")
114
+ ##
115
+ def node(node_id, node_hash = nil)
116
+ @dot << " #{node_id.to_s}"
117
+ index = 0
118
+ if node_hash
119
+ @dot << " ["
120
+ node_hash.each do |k, v|
121
+ k = k.to_s
122
+ @dot << "#{k} = \"#{v}\""
123
+ index += 1
124
+ @dot << ", " unless index == node_hash.size
125
+ end
126
+ @dot << "]"
127
+ end
128
+ @dot << ";\n"
129
+ self
130
+ end
131
+
132
+ ## Create a non-directional edge (connection line between nodes) with its options
133
+ ##
134
+ ## Example:
135
+ ##
136
+ ## graph.edge("node-01", "node-02", :label => "connecting 1 and 2", :color => "lightblue")
137
+ ##
138
+ def edge(nid1, nid2, edge_hash = nil)
139
+ @dot << create_edge("--", nid1, nid2, edge_hash) + ";\n"
140
+ self
141
+ end
142
+
143
+ ## Create a directional edge (arrow from node to node) with its options
144
+ ##
145
+ ## Example:
146
+ ## graph.arrow_edge("node-01", "node-02", :label => "from 1 to 2", :color => "lightblue")
147
+ ##
148
+ def arrow_edge(nid1, nid2, edge_hash = nil)
149
+ @dot << create_edge("->", nid1, nid2, edge_hash) + ";\n"
150
+ self
151
+ end
152
+
153
+ ## Align nodes on the same rank connecting them with non-directional edges
154
+ ##
155
+ def rank(nid1, nid2, edge_hash = nil)
156
+ @dot << "{rank=same " + create_edge("--", nid1, nid2, edge_hash) + "}\n"
157
+ self
158
+ end
159
+
160
+ ## Convert graph into dot formatted data
161
+ ##
162
+ def to_dot
163
+ finish_graph
164
+ @dot = @dot.gsub(/\"\</m, "<").gsub(/\>\"/m, ">")
165
+ return @dot
166
+ end
167
+ end
data/lib/rubyplb.rb ADDED
@@ -0,0 +1,336 @@
1
+ ## lib/rubyplb.rb -- Patten Lattice Builder written in Ruby
2
+ ## Design:: Kow Kuroda (mailto: kuroda@nict.go.jp)
3
+ ## Program:: Yoichiro Hasebe (mailto: yohasebe@gmail.com)
4
+ ## Copyright:: Copyright 2009 Kow Kuroda and Yoichiro Hasebe
5
+ ## License:: GNU GPL version 3
6
+
7
+ $KCODE = 'utf8'
8
+
9
+ require 'ruby_graphviz'
10
+
11
+ def showerror(sentence, severity)
12
+ if severity == 0
13
+ puts "Warning: #{sentence} The output may not be meaningful."
14
+ elsif severity == 1
15
+ puts "Error: #{sentence} No output generated."
16
+ exit
17
+ end
18
+ end
19
+
20
+ class Array
21
+ def subset
22
+ (0..self.length).inject([]) do |ret, n|
23
+ ret.push(*self.combination(n))
24
+ end
25
+ end
26
+ end
27
+
28
+ class Node
29
+ attr_accessor :data, :children, :parents, :leaf, :level, :num_instances
30
+ def initialize(data)
31
+ @data = data
32
+ @level = data.select{|b| b != "_"}.size
33
+ if @level == 0
34
+ @leaf = true
35
+ else
36
+ @leaf = false
37
+ end
38
+ @children = []
39
+ @parents = []
40
+ @num_instances = 1
41
+ end
42
+
43
+ def children_instances
44
+ @children.inject(0) { |sum, child| sum += child.num_instances }
45
+ end
46
+ end
47
+
48
+ class PatLattice
49
+ attr_accessor :levels, :root_level, :root, :leaves, :nodes
50
+
51
+ def initialize(opts)
52
+ @opts = opts
53
+ @levels = []
54
+ @root_level = 0
55
+ @root = nil
56
+ @leaves = []
57
+ @nodes = {}
58
+ @level_data = {}
59
+ @coloring = {}
60
+ end
61
+
62
+ def ary_compact(ary, target = nil)
63
+ prev = nil
64
+ result = []
65
+ ary.each do |n|
66
+ next if (prev == n and n == target)
67
+ prev = n
68
+ result << n
69
+ end
70
+ return result
71
+ end
72
+
73
+ def create_patterns(sentence, compact)
74
+ words = sentence.split(/\s+/)
75
+
76
+ if /\((\d+)\)/ =~ words[-1]
77
+ words.pop
78
+ times = $1.to_i
79
+ else
80
+ times = 1
81
+ end
82
+
83
+ if /\[(.+)\]/ =~ words[-1]
84
+ words.pop
85
+ color = $1
86
+ end
87
+
88
+ words.each do |w|
89
+ if /\[\]\(\)/ =~ w
90
+ raise "Data contains an invalid string."
91
+ end
92
+ end
93
+
94
+ idx = (0...words.size).to_a
95
+ words_with_idx = words.zip(idx).collect{|a| a.join("-")}
96
+ masks = words_with_idx.subset
97
+ ptns = []
98
+ masks.each do |mask|
99
+ ptn1 = []
100
+ words_with_idx.each do |t|
101
+ if mask.index(t)
102
+ /\A(.*?)\-\d+\z/ =~ t
103
+ ptn1 << $1
104
+ else
105
+ ptn1 << "_"
106
+ end
107
+ end
108
+ if compact
109
+ ptns << ary_compact(ptn1, "_")
110
+ else
111
+ ptns << ptn1
112
+ end
113
+ end
114
+
115
+ color = color ? color : "gray60"
116
+
117
+ if @coloring[color]
118
+ @coloring[color] += ptns
119
+ else
120
+ @coloring[color] = ptns
121
+ end
122
+
123
+
124
+ return ptns * times
125
+ end
126
+
127
+ def search(pattern)
128
+ node = nodes[pattern.join("+").intern]
129
+ end
130
+
131
+ def insert(sentence, compact)
132
+ ptns = create_patterns(sentence, compact)
133
+
134
+ new_nodes = []
135
+ ptns.each do |ptn|
136
+ if existing = search(ptn)
137
+ existing.num_instances += 1
138
+ else
139
+ node = Node.new(ptn)
140
+ nodes[node.data.join("+").intern] = node
141
+ new_nodes << node
142
+ end
143
+ end
144
+
145
+ new_nodes.each do |node|
146
+ level = node.level
147
+ if levels[level]
148
+ levels[level] << node
149
+ else
150
+ levels[level] = [node]
151
+ end
152
+
153
+
154
+ uplevel = levels[level - 1]
155
+ if level != 0 and uplevel
156
+ uplevel.each do |sup_node|
157
+ rgx = Regexp.new("\\A" + sup_node.data.join(" ").gsub(/(\b_)+/, ".+?") + "\\z")
158
+ if rgx.match(node.data.join(" "))
159
+ sup_node.children << node
160
+ node.parents << sup_node
161
+ end
162
+ end
163
+ end
164
+
165
+ downlevel = levels[level + 1]
166
+ if downlevel
167
+ break unless downlevel
168
+ downlevel.each do |sub_node|
169
+ rgx = Regexp.new("\\A" + node.data.join(" ").gsub(/\_/, ".*") + "\\z")
170
+ if rgx.match(sub_node.data.join(" "))
171
+ node.children << sub_node
172
+ sub_node.parents << node
173
+ end
174
+ end
175
+ end
176
+ @leaves << node if node.leaf
177
+ end
178
+
179
+ @root_level = levels.size - 1
180
+ @root = levels[root_level].first
181
+ end
182
+
183
+ def traverse(&block)
184
+ levels.each do |level|
185
+ level.each do |node|
186
+ yield node
187
+ end
188
+ end
189
+ end
190
+
191
+ def setup_data
192
+ levels.each_with_index do |level, l_index|
193
+ num_nodes_non_terminal = 0
194
+ sum_node_non_terminal = 0
195
+ avg_num_children = 0
196
+ valid_elements = []
197
+ level.each do |node|
198
+ next if node.children_instances == 0
199
+ valid_elements << node
200
+ num_nodes_non_terminal += 1
201
+ sum_node_non_terminal += node.children_instances
202
+ end
203
+ if valid_elements.size > 0
204
+ avg_num_children = sum_node_non_terminal.to_f / num_nodes_non_terminal
205
+ x = valid_elements.inject(0){|sum, node| (node.children_instances - avg_num_children) ** 2 + sum}
206
+ std_dev = Math.sqrt( x / num_nodes_non_terminal)
207
+ @level_data[l_index] = {:num_nodes_non_terminal => num_nodes_non_terminal,
208
+ :avg_num_children => avg_num_children,
209
+ :std_dev_num_children => std_dev
210
+ }
211
+ else
212
+ @level_data[l_index] = {:num_nodes_non_terminal => 0,
213
+ :avg_num_children => 0,
214
+ :std_dev_num_children => 0.0
215
+ }
216
+ end
217
+ end
218
+ end
219
+
220
+ def create_nodelabel(node)
221
+ if (@opts[:coloring] || !@opts[:simple])
222
+ if node.level != 0 and node.children_instances > 0
223
+ ldata = @level_data[node.level]
224
+ dev = node.children_instances - ldata[:avg_num_children]
225
+ zscore = dev / ldata[:std_dev_num_children]
226
+ zscore = zscore.nan? ? 0.0 : zscore
227
+ else
228
+ zscore = 0.0
229
+ end
230
+ end
231
+
232
+ color = "#ffffff"
233
+ if @opts[:coloring]
234
+ if !zscore.nan? and zscore != 0.0
235
+ if zscore >= 3.0
236
+ color = "2"
237
+ elsif zscore >= 2.0
238
+ color = "3"
239
+ elsif zscore >= 1.5
240
+ color = "4"
241
+ elsif zscore >= 1.0
242
+ color = "5"
243
+ elsif zscore > 0.5
244
+ color = "6"
245
+ elsif zscore >= 0.0
246
+ color = "7"
247
+ elsif zscore >= -0.5
248
+ color = "8"
249
+ elsif zscore >= -1.0
250
+ color = "9"
251
+ elsif zscore >= -1.5
252
+ color = "10"
253
+ else
254
+ color = "11"
255
+ end
256
+ end
257
+ end
258
+ zscore = ((zscore * 100).round / 100.0)
259
+ border = "0"
260
+ pat_str = node.data.collect{|td|"<td color='black'>#{td}</td>"}.join
261
+ pat_str = "&nbsp;" * 5 if pat_str == ""
262
+ label = "<<table bgcolor='#{color}' border='#{border}' cellborder='1' cellspacing='0' cellpadding='5'>" +
263
+ "<tr>#{pat_str}</tr>"
264
+ if !@opts[:simple]
265
+ label += "<tr><td color='black' colspan='#{node.data.size.to_s}'> "
266
+ if node.level != 0 and node.children_instances > 0
267
+ label += node.children_instances.to_s + " (" + zscore.to_s + ")"
268
+ end
269
+ label += "</td></tr>"
270
+ end
271
+ label += "</table>>"
272
+ return label
273
+ end
274
+
275
+ def create_node(graph, node_id, node_label)
276
+ graph.node(node_id, :label => node_label, :shape => "plaintext",
277
+ :height => "0.0", :width => "0.0",
278
+ :margin => "0.0", :colorscheme => "rdylbu11", :URL => node_id)
279
+ end
280
+
281
+ def generate_dot
282
+ setup_data if (@opts[:coloring] || !@opts[:simple])
283
+ nodes_drawn = []
284
+ rankdir = @opts[:vertical] ? "" : "LR"
285
+ plb = RubyGraphviz.new("plb", :rankdir => rankdir, :nodesep => "0.8", :ranksep => "0.8")
286
+ levels.each do |level|
287
+ level.each do |node|
288
+ node_id = node.object_id
289
+ unless nodes_drawn.index node_id
290
+ node_label = create_nodelabel(node)
291
+ create_node(plb, node_id, node_label)
292
+ nodes_drawn << node_id
293
+ end
294
+ node.children.each do |cnode|
295
+ cnode_id = cnode.object_id
296
+ unless nodes_drawn.index cnode_id
297
+ cnode_label = create_nodelabel(cnode)
298
+ create_node(plb, cnode_id, cnode_label)
299
+ nodes_drawn << node_id
300
+ end
301
+ if @opts[:coloring]
302
+ colors = []
303
+ @coloring.each do |color, val|
304
+ if val.index(node.data) and val.index(cnode.data)
305
+ colors << color
306
+ end
307
+ end
308
+ else
309
+ colors = ["black"]
310
+ end
311
+ plb.edge(node_id, cnode_id, :color => colors.join(":"))
312
+ end
313
+ end
314
+ end
315
+ result = plb.to_dot.gsub(/\"\</m, "<").gsub(/\>\"/m, ">")
316
+ return result
317
+ end
318
+
319
+ def generate_img(outfile, image_type, straight_line = false)
320
+ dot = generate_dot
321
+ isthere_dot = `dot -V 2>&1`
322
+ if isthere_dot !~ /dot.*version/i
323
+ showerror("Graphviz's dot program cannot be found.", 1)
324
+ else
325
+ if straight_line
326
+ cmd = "dot | neato -n -T#{image_type} -o#{outfile} 2>rubyplb.log"
327
+ else
328
+ cmd = "dot -T#{image_type} -o#{outfile} 2>rubyplb.log"
329
+ end
330
+ IO.popen(cmd, 'r+') do |io|
331
+ io.puts dot
332
+ end
333
+ end
334
+ end
335
+
336
+ end