cabbage 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,10 +1,11 @@
1
1
  = cabbage
2
2
 
3
- Cabbage is a simple gem that parses Graphviz DOT files.
3
+ Cabbage is a simple library of parsers. Currently, it parses Graphviz DOT files and emails. The goal is maximum ease of use and simplicity.
4
4
 
5
5
  use:
6
- install the gem named 'cabbage', and parse a dotfile by invoking 'Cabbage.dotfile(arg)' in your script.
7
- (where 'arg' is either a string containing either the path to a dotfile or a dot-formatted string)
6
+ install the gem named 'cabbage', and parse a dotfile by invoking 'Cabbage.dotfile("dotfile")' in your script.
7
+ (where "dorfile" is either a string containing either the path to a dotfile or a dot-formatted string)
8
+ Similarly, Cabbage.email("email_file") will parse an email. More parsers to come as I have need of them.
8
9
 
9
10
  == Contributing to cabbage
10
11
  Send me a message if you feel the urge to contribute.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.2.0
data/cabbage.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "cabbage"
8
- s.version = "0.1.3"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Josh Lauer"]
12
- s.date = "2011-09-24"
12
+ s.date = "2011-09-26"
13
13
  s.description = "More to come."
14
14
  s.email = "josh.lauer75@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -26,6 +26,11 @@ Gem::Specification.new do |s|
26
26
  "VERSION",
27
27
  "cabbage.gemspec",
28
28
  "lib/cabbage.rb",
29
+ "lib/cabbage/dotfile/parser.rb",
30
+ "lib/cabbage/email/email.rb",
31
+ "lib/cabbage/email/mime_part.rb",
32
+ "lib/cabbage/email/parser.rb",
33
+ "lib/cabbage/string_extras.rb",
29
34
  "test/helper.rb",
30
35
  "test/test_cabbage.rb"
31
36
  ]
data/lib/cabbage.rb CHANGED
@@ -1,138 +1,22 @@
1
- #!/usr/bin/env ruby
2
- # encoding: UTF-8
3
1
  module Cabbage
4
2
 
5
- # just pass calls to new on to DotFile class for now
6
- def self.new(*args, &block)
7
- DotFile.new(args[0], &block) # passes the the first argument on
8
- end
3
+ # # You can't instantiate a cabbage. (yet)
4
+ # def self.new(*args, &block)
5
+ #
6
+ # end
9
7
 
8
+ # graphviz DOT format files
10
9
  def self.dotfile(*args, &block)
10
+ require_relative "cabbage/dotfile/parser"
11
11
  DotFile.new(args[0], &block) # passes the the first argument on
12
12
  end
13
13
 
14
- class DotFile
15
-
16
- # pass it a string containing either the DotFile itself, or the path to
17
- # a DOTfile.
18
- def initialize(source = nil)
19
- @raw_dotfile = "" # unparsed DOTfile
20
- @graph_type = "" #
21
- @title = ""
22
- @header = {}
23
- @nodes = []
24
- @connections = []
25
- source != nil if parse(source)
26
- end
27
-
28
- attr_accessor :raw_dotfile, :graph_type, :title, :header, :nodes, :connections
29
-
30
- # no public methods yet apart from accessors
31
-
32
- # parsing methods below
33
- private
34
-
35
- def load_from_file(dotfile_path)
36
- @raw_dotfile = IO.read(dotfile_path)
37
- end
38
-
39
- def load_from_string(dotfile)
40
- @raw_dotfile = dotfile
41
- end
42
-
43
- def parse(source = nil)
44
- begin
45
- if source.class == String
46
- if source.include?("\n")
47
- load_from_string(source)
48
- else
49
- load_from_file(source)
50
- end
51
- parse_dotfile()
52
- elsif source
53
- raise
54
- end
55
- rescue
56
- puts 'Unhandled parser exception! Parse failed.'
57
- end
58
- end
59
-
60
- # a dotfile has four components:
61
- # graph_type, header, nodes, connections
62
- def parse_dotfile
63
- # the chunk is everything inside '{}'
64
- raw_chunk = @raw_dotfile.split("{")[1].split("}")[0].strip
65
- # pull out the header
66
- raw_header = raw_chunk.match(/([\w\s*=".,\s\[\]_\\]+;)*/m)[0]
67
- # find body by chopping header off chunk
68
- raw_body = raw_chunk.sub(raw_header, "")
69
- # split the body on '>];', which delimits the tables section
70
- raw_connections = raw_body.split(">];")[-1].strip
71
- # split out the tables section from the body
72
- raw_tables = raw_body.split(">];")[0 .. -2].join(">];").strip + " \n>];"
73
-
74
- # assemble the output hash
75
- @graph_type = @raw_dotfile.match(/\A\s*((?:di)?graph)/)[1]
76
- @title = @raw_dotfile.match(/\A\s*(?:di)?graph\s*(\w+)/)[1]
77
- @header = parse_header(raw_header, ";")
78
- @nodes = parse_nodes(raw_tables)
79
- @connections = parse_connections(raw_connections)
80
- end
81
-
82
- def parse_header(raw_header, delimiter)
83
- temp = {}
84
- raw_header.scan(/(\w+)(?:\s*=?\s*)(?:["|\[](.+?)["|\]]#{delimiter})/m).each do |n|
85
- if n[1].include?("=")
86
- temp[n[0]] = parse_header(n[1], ",")
87
- else
88
- temp[n[0]] = n[1].strip
89
- end
90
- end
91
- return temp
92
- end
93
-
94
- def chop_tables(raw_tables)
95
- {}.tap do |output|
96
- raw_tables.scan(/\s*\"*([\w:]+)\"*\s*\[\w+\s*=\s*<(.+?)>\];/m).each do |n|
97
- output[ n[0].gsub('\"', '').strip ] = n[1].strip
98
- end
99
- end
100
- end
101
-
102
- def parse_nodes(raw_tables)
103
- result = []
104
- chop_tables(raw_tables).each do |name, table|
105
- node = {:name => name.sub("m_", "")}
106
- node[:fields] = []
107
- if table.include?("|")
108
- table.split("|")[1].scan(/port="([\w:]+)">[^<]+<[^>]+>(.+?)</m).each do |pair|
109
- node[:fields] << { :name => pair[0], :type => pair[1] }
110
- end
111
- end
112
- result << node
113
- end
114
- result
115
- end
116
-
117
- def parse_connections(node_chunk)
118
- output = []
119
- node_chunk.split("\n").each do |this_line|
120
- this_connection = {}
121
- temp = this_line.split("->")
122
- this_connection[:start_node] = temp[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
123
- this_connection[:end_node] = temp[1].split("[")[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
124
- tokens = temp[1].split("[")[1].split("]")[0].split(",")
125
- tokens.each do |token_string|
126
- token_pair = token_string.split("=")
127
- this_connection[token_pair[0].strip.gsub('"', '').gsub('\\', '').to_sym] = token_pair[1].strip.gsub('"', '').gsub('\\', '')
128
- end
129
- output << this_connection
130
- end
131
- return output
132
- end
133
-
134
-
14
+ # raw emails
15
+ def self.email(*args, &block)
16
+ require_relative "cabbage/email/email"
17
+ require_relative "cabbage/email/parser"
18
+ require_relative "cabbage/string_extras"
19
+ Email.new(args[0], &block)
135
20
  end
136
21
 
137
- end
138
-
22
+ end
@@ -0,0 +1,125 @@
1
+ # encoding: UTF-8
2
+ module Cabbage
3
+
4
+ class DotFile
5
+
6
+ # pass it a string containing either the DotFile itself, or the path to
7
+ # a DOTfile.
8
+ def initialize(source = nil)
9
+ @raw_dotfile = "" # unparsed DOTfile
10
+ @graph_type = "" #
11
+ @title = ""
12
+ @header = {}
13
+ @nodes = []
14
+ @connections = []
15
+ source != nil if parse(source)
16
+ end
17
+
18
+ attr_accessor :raw_dotfile, :graph_type, :title, :header, :nodes, :connections
19
+
20
+ # no public methods yet apart from accessors
21
+
22
+ # parsing methods below
23
+ private
24
+
25
+ def load_from_file(dotfile_path)
26
+ @raw_dotfile = IO.read(dotfile_path)
27
+ end
28
+
29
+ def load_from_string(dotfile)
30
+ @raw_dotfile = dotfile
31
+ end
32
+
33
+ def parse(source = nil)
34
+ begin
35
+ if source.class == String
36
+ if source.include?("\n")
37
+ load_from_string(source)
38
+ else
39
+ load_from_file(source)
40
+ end
41
+ parse_dotfile()
42
+ elsif source
43
+ raise
44
+ end
45
+ rescue
46
+ puts 'Unhandled parser exception! Parse failed.'
47
+ end
48
+ end
49
+
50
+ # a dotfile has four components:
51
+ # graph_type, header, nodes, connections
52
+ def parse_dotfile
53
+ # the chunk is everything inside '{}'
54
+ raw_chunk = @raw_dotfile.split("{")[1].split("}")[0].strip
55
+ # pull out the header
56
+ raw_header = raw_chunk.match(/([\w\s*=".,\s\[\]_\\]+;)*/m)[0]
57
+ # find body by chopping header off chunk
58
+ raw_body = raw_chunk.sub(raw_header, "")
59
+ # split the body on '>];', which delimits the tables section
60
+ raw_connections = raw_body.split(">];")[-1].strip
61
+ # split out the tables section from the body
62
+ raw_tables = raw_body.split(">];")[0 .. -2].join(">];").strip + " \n>];"
63
+
64
+ # assemble the output hash
65
+ @graph_type = @raw_dotfile.match(/\A\s*((?:di)?graph)/)[1]
66
+ @title = @raw_dotfile.match(/\A\s*(?:di)?graph\s*(\w+)/)[1]
67
+ @header = parse_header(raw_header, ";")
68
+ @nodes = parse_nodes(raw_tables)
69
+ @connections = parse_connections(raw_connections)
70
+ end
71
+
72
+ def parse_header(raw_header, delimiter)
73
+ temp = {}
74
+ raw_header.scan(/(\w+)(?:\s*=?\s*)(?:["|\[](.+?)["|\]]#{delimiter})/m).each do |n|
75
+ if n[1].include?("=")
76
+ temp[n[0]] = parse_header(n[1], ",")
77
+ else
78
+ temp[n[0]] = n[1].strip
79
+ end
80
+ end
81
+ return temp
82
+ end
83
+
84
+ def chop_tables(raw_tables)
85
+ {}.tap do |output|
86
+ raw_tables.scan(/\s*\"*([\w:]+)\"*\s*\[\w+\s*=\s*<(.+?)>\];/m).each do |n|
87
+ output[ n[0].gsub('\"', '').strip ] = n[1].strip
88
+ end
89
+ end
90
+ end
91
+
92
+ def parse_nodes(raw_tables)
93
+ result = []
94
+ chop_tables(raw_tables).each do |name, table|
95
+ node = {:name => name.sub("m_", "")}
96
+ node[:fields] = []
97
+ if table.include?("|")
98
+ table.split("|")[1].scan(/port="([\w:]+)">[^<]+<[^>]+>(.+?)</m).each do |pair|
99
+ node[:fields] << { :name => pair[0], :type => pair[1] }
100
+ end
101
+ end
102
+ result << node
103
+ end
104
+ result
105
+ end
106
+
107
+ def parse_connections(node_chunk)
108
+ output = []
109
+ node_chunk.split("\n").each do |this_line|
110
+ this_connection = {}
111
+ temp = this_line.split("->")
112
+ this_connection[:start_node] = temp[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
113
+ this_connection[:end_node] = temp[1].split("[")[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
114
+ tokens = temp[1].split("[")[1].split("]")[0].split(",")
115
+ tokens.each do |token_string|
116
+ token_pair = token_string.split("=")
117
+ this_connection[token_pair[0].strip.gsub('"', '').gsub('\\', '').to_sym] = token_pair[1].strip.gsub('"', '').gsub('\\', '')
118
+ end
119
+ output << this_connection
120
+ end
121
+ return output
122
+ end
123
+ end # end DotFile class
124
+
125
+ end # end Cabbage module
@@ -0,0 +1,148 @@
1
+ require_relative "parser"
2
+ require_relative "mime_part"
3
+ module Cabbage
4
+ class Email
5
+ include EmailParser
6
+
7
+ def initialize(source = "")
8
+ @raw_source = ""
9
+ @raw_parsed = {}
10
+ @header = {}
11
+ @original_keys = {}
12
+ @parts = []
13
+ @multipart = false
14
+ if source != nil && source.class.to_s == "String"
15
+ if source.include?("\n")
16
+ @raw_source = source
17
+ parse()
18
+ else
19
+ load_and_parse(source)
20
+ end
21
+ else
22
+ raise "Bad input to Cabbage::Email#new."
23
+ end
24
+ end
25
+
26
+ attr_reader :raw_source, :raw_parsed, :header, :original_keys, :parts, :multipart
27
+
28
+ def method_missing(m, *args, &block)
29
+ if @header.keys.include?(m.intern)
30
+ @header[m.intern]
31
+ else
32
+ raise "undefined method in Cabbage::Email"
33
+ end
34
+ end
35
+
36
+ def load(filename)
37
+ open(filename) {|f| @raw_source = f.read }
38
+ end
39
+
40
+ def load_and_parse(filename)
41
+ open(filename) {|f| @raw_source = f.read }
42
+ parse()
43
+ end
44
+
45
+ def parse
46
+ if @raw_source.empty?
47
+ puts "Nothing to parse."
48
+ return false
49
+ else
50
+ @raw_parsed = EmailParser.parse_email(@raw_source)
51
+ end
52
+ @header = @raw_parsed[:header]
53
+ @original_keys = @raw_parsed[:original_keys]
54
+ if header[:content_type].include?("multipart")
55
+ @multipart = true
56
+ make_flat(@raw_parsed).each do |raw_part|
57
+ @parts << MimePart.new(raw_part)
58
+ end
59
+ else
60
+ make_flat(@raw_parsed).each do |raw_part|
61
+ @parts << MimePart.new(raw_part)
62
+ end
63
+ end
64
+ return true
65
+ end
66
+
67
+ def multipart?
68
+ @multipart
69
+ end
70
+
71
+ def [](key)
72
+ if @header.has_key?(key)
73
+ @header[key]
74
+ elsif key == :body
75
+ self.body
76
+ elsif self.mime_types.include?(key)
77
+ @parts[self.mime_types.index(key)].body
78
+ elsif key.class == FixNum
79
+
80
+ else
81
+ nil
82
+ end
83
+ end
84
+
85
+ def keys
86
+ @header.keys + [:body] + self.mime_types
87
+ end
88
+
89
+ def attachments
90
+ [].tap do |output|
91
+ @parts.each do |part|
92
+ output << part if part.attachment?
93
+ end
94
+ end
95
+ end
96
+
97
+ # returns an array of strings representing available
98
+ # mime types in the array of mime parts.
99
+ def mime_types
100
+ [].tap do |result|
101
+ @parts.each do |part|
102
+ result << part.content_type
103
+ end
104
+ end
105
+ end
106
+
107
+ def body(type = "text/plain")
108
+ if @multipart
109
+ if self.mime_types.include?(type)
110
+ @parts[self.mime_types.index(type)].body
111
+ elsif @parts.size > 0
112
+ @parts[0].body
113
+ else
114
+ nil
115
+ end
116
+ else
117
+ @raw_parsed[:body]
118
+ end
119
+ end
120
+
121
+ #####################################################################
122
+ ########################## PRIVATE METHODS ##########################
123
+ #####################################################################
124
+
125
+ private
126
+
127
+ def make_flat(tree)
128
+ results = []
129
+
130
+ if tree[:body].class == Hash
131
+ tree[:body].each do |this_part|
132
+ if this_part[:header][:content_type] =~ /^multipart/
133
+ results << make_flat(this_part)
134
+ else
135
+ results << this_part
136
+ end
137
+ end
138
+ elsif tree[:body].class == String
139
+ results << tree
140
+ else
141
+ # something went wrong
142
+ end
143
+ return results.flatten
144
+ end
145
+
146
+ end # end Message class
147
+
148
+ end # end Jmail module
@@ -0,0 +1,38 @@
1
+ module Cabbage
2
+ class MimePart
3
+
4
+ def initialize(raw_part)
5
+ @raw_source = raw_part
6
+ @header = raw_part[:header]
7
+ @original_keys = raw_part[:original_keys]
8
+ @body = raw_part[:body]
9
+ @content_type = @header[:content_type].split(";")[0].strip
10
+ end
11
+ attr_accessor :raw_source, :header, :original_keys, :body, :content_type
12
+
13
+ def method_missing(m, *args, &block)
14
+ if @header.keys.include?(m.intern)
15
+ @header[m.intern]
16
+ else
17
+ raise "undefined method in Cabbage::MimePart"
18
+ end
19
+ end
20
+
21
+ def [](key)
22
+ @header[key]
23
+ end
24
+
25
+ def keys
26
+ @header.keys
27
+ end
28
+
29
+ def attachment?
30
+ if @header.has_key?(:content_disposition) && @header[:content_disposition].start_with?("attachment")
31
+ true
32
+ else
33
+ false
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,47 @@
1
+ module Cabbage
2
+
3
+ module EmailParser
4
+
5
+ require_relative "../string_extras"
6
+
7
+ def EmailParser.parse_email(source)
8
+ return {} if source.empty?
9
+ results = { :header => {}, :original_keys => {}, :body => [] }
10
+ divided = source.strip.match(/\A\s*(.+?)(?:\r\n\r\n|\n\n)(.+)/m)
11
+ raw_header = divided[1]
12
+ raw_body = divided[2]
13
+ raw_header.gsub(/\n\s+/, " ").split("\n").each do |this_line|
14
+ this_pair = this_line.split(/:\s*/, 2)
15
+ this_key_string = this_pair[0].strip
16
+ this_key_symbol = this_key_string.down_under.intern
17
+ this_value = this_pair[1]
18
+ if results[:header].has_key?(this_key_symbol)
19
+ unless results[:header][this_key_symbol].class == Array
20
+ results[:header][this_key_symbol] = [results[:header][this_key_symbol]]
21
+ end
22
+ results[:header][this_key_symbol] << this_value
23
+ else
24
+ results[:header][this_key_symbol] = this_value.strip
25
+ results[:original_keys][this_key_symbol] = this_key_string
26
+ end
27
+ end
28
+ if results[:header][:content_type].start_with?("multipart")
29
+ boundary = EmailParser.extract_value(results[:header][:content_type], "boundary")
30
+ EmailParser.break_by_boundary(raw_body, boundary).each {|n| results[:body] << EmailParser.parse_email(n)}
31
+ else
32
+ results[:body] = raw_body
33
+ end
34
+ results
35
+ end
36
+
37
+ def EmailParser.break_by_boundary(source, boundary)
38
+ source.split("--" + boundary).keep_if {|n| n != "--" && n != ""}.each {|n| n.strip!}
39
+ end
40
+
41
+ def EmailParser.extract_value(target_string, key)
42
+ target_string.match(/#{key}=["']?([^;\s"']+)/i)[1]
43
+ end
44
+
45
+ end # end Parser module
46
+
47
+ end
@@ -0,0 +1,18 @@
1
+ # a couple of extra methods for strings, to make things easier
2
+
3
+ class String
4
+
5
+ # if a blank? method isn't yet defined for strings, do so
6
+ # here. If run within rails, this should already be
7
+ # defined. (via ActiveSupport)
8
+ if !(String.public_method_defined? :blank?)
9
+ def blank?
10
+ self !~ /\S/
11
+ end
12
+ end
13
+
14
+ def down_under
15
+ self.gsub("-", "_").downcase
16
+ end
17
+
18
+ end # end String class additions
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabbage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-24 00:00:00.000000000Z
12
+ date: 2011-09-26 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: shoulda
16
- requirement: &15137480 !ruby/object:Gem::Requirement
16
+ requirement: &12429820 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *15137480
24
+ version_requirements: *12429820
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bundler
27
- requirement: &15135640 !ruby/object:Gem::Requirement
27
+ requirement: &12425540 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.0.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *15135640
35
+ version_requirements: *12425540
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: jeweler
38
- requirement: &15133500 !ruby/object:Gem::Requirement
38
+ requirement: &12420200 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.6.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *15133500
46
+ version_requirements: *12420200
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rcov
49
- requirement: &15128820 !ruby/object:Gem::Requirement
49
+ requirement: &12400380 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *15128820
57
+ version_requirements: *12400380
58
58
  description: More to come.
59
59
  email: josh.lauer75@gmail.com
60
60
  executables: []
@@ -72,6 +72,11 @@ files:
72
72
  - VERSION
73
73
  - cabbage.gemspec
74
74
  - lib/cabbage.rb
75
+ - lib/cabbage/dotfile/parser.rb
76
+ - lib/cabbage/email/email.rb
77
+ - lib/cabbage/email/mime_part.rb
78
+ - lib/cabbage/email/parser.rb
79
+ - lib/cabbage/string_extras.rb
75
80
  - test/helper.rb
76
81
  - test/test_cabbage.rb
77
82
  homepage: http://github.com/josh-lauer/cabbage
@@ -89,7 +94,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
89
94
  version: '0'
90
95
  segments:
91
96
  - 0
92
- hash: 1830193836739110796
97
+ hash: 634790544501283345
93
98
  required_rubygems_version: !ruby/object:Gem::Requirement
94
99
  none: false
95
100
  requirements: