cabbage 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,10 +1,11 @@
1
1
  = cabbage
2
2
 
3
- Cabbage is a simple gem that parses Graphviz DOT files.
3
+ Cabbage is a simple library of parsers. Currently, it parses Graphviz DOT files and emails. The goal is maximum ease of use and simplicity.
4
4
 
5
5
  use:
6
- install the gem named 'cabbage', and parse a dotfile by invoking 'Cabbage.dotfile(arg)' in your script.
7
- (where 'arg' is either a string containing either the path to a dotfile or a dot-formatted string)
6
+ install the gem named 'cabbage', and parse a dotfile by invoking 'Cabbage.dotfile("dotfile")' in your script.
7
+ (where "dorfile" is either a string containing either the path to a dotfile or a dot-formatted string)
8
+ Similarly, Cabbage.email("email_file") will parse an email. More parsers to come as I have need of them.
8
9
 
9
10
  == Contributing to cabbage
10
11
  Send me a message if you feel the urge to contribute.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.2.0
data/cabbage.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "cabbage"
8
- s.version = "0.1.3"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Josh Lauer"]
12
- s.date = "2011-09-24"
12
+ s.date = "2011-09-26"
13
13
  s.description = "More to come."
14
14
  s.email = "josh.lauer75@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -26,6 +26,11 @@ Gem::Specification.new do |s|
26
26
  "VERSION",
27
27
  "cabbage.gemspec",
28
28
  "lib/cabbage.rb",
29
+ "lib/cabbage/dotfile/parser.rb",
30
+ "lib/cabbage/email/email.rb",
31
+ "lib/cabbage/email/mime_part.rb",
32
+ "lib/cabbage/email/parser.rb",
33
+ "lib/cabbage/string_extras.rb",
29
34
  "test/helper.rb",
30
35
  "test/test_cabbage.rb"
31
36
  ]
data/lib/cabbage.rb CHANGED
@@ -1,138 +1,22 @@
1
- #!/usr/bin/env ruby
2
- # encoding: UTF-8
3
1
  module Cabbage
4
2
 
5
- # just pass calls to new on to DotFile class for now
6
- def self.new(*args, &block)
7
- DotFile.new(args[0], &block) # passes the the first argument on
8
- end
3
+ # # You can't instantiate a cabbage. (yet)
4
+ # def self.new(*args, &block)
5
+ #
6
+ # end
9
7
 
8
+ # graphviz DOT format files
10
9
  def self.dotfile(*args, &block)
10
+ require_relative "cabbage/dotfile/parser"
11
11
  DotFile.new(args[0], &block) # passes the the first argument on
12
12
  end
13
13
 
14
- class DotFile
15
-
16
- # pass it a string containing either the DotFile itself, or the path to
17
- # a DOTfile.
18
- def initialize(source = nil)
19
- @raw_dotfile = "" # unparsed DOTfile
20
- @graph_type = "" #
21
- @title = ""
22
- @header = {}
23
- @nodes = []
24
- @connections = []
25
- source != nil if parse(source)
26
- end
27
-
28
- attr_accessor :raw_dotfile, :graph_type, :title, :header, :nodes, :connections
29
-
30
- # no public methods yet apart from accessors
31
-
32
- # parsing methods below
33
- private
34
-
35
- def load_from_file(dotfile_path)
36
- @raw_dotfile = IO.read(dotfile_path)
37
- end
38
-
39
- def load_from_string(dotfile)
40
- @raw_dotfile = dotfile
41
- end
42
-
43
- def parse(source = nil)
44
- begin
45
- if source.class == String
46
- if source.include?("\n")
47
- load_from_string(source)
48
- else
49
- load_from_file(source)
50
- end
51
- parse_dotfile()
52
- elsif source
53
- raise
54
- end
55
- rescue
56
- puts 'Unhandled parser exception! Parse failed.'
57
- end
58
- end
59
-
60
- # a dotfile has four components:
61
- # graph_type, header, nodes, connections
62
- def parse_dotfile
63
- # the chunk is everything inside '{}'
64
- raw_chunk = @raw_dotfile.split("{")[1].split("}")[0].strip
65
- # pull out the header
66
- raw_header = raw_chunk.match(/([\w\s*=".,\s\[\]_\\]+;)*/m)[0]
67
- # find body by chopping header off chunk
68
- raw_body = raw_chunk.sub(raw_header, "")
69
- # split the body on '>];', which delimits the tables section
70
- raw_connections = raw_body.split(">];")[-1].strip
71
- # split out the tables section from the body
72
- raw_tables = raw_body.split(">];")[0 .. -2].join(">];").strip + " \n>];"
73
-
74
- # assemble the output hash
75
- @graph_type = @raw_dotfile.match(/\A\s*((?:di)?graph)/)[1]
76
- @title = @raw_dotfile.match(/\A\s*(?:di)?graph\s*(\w+)/)[1]
77
- @header = parse_header(raw_header, ";")
78
- @nodes = parse_nodes(raw_tables)
79
- @connections = parse_connections(raw_connections)
80
- end
81
-
82
- def parse_header(raw_header, delimiter)
83
- temp = {}
84
- raw_header.scan(/(\w+)(?:\s*=?\s*)(?:["|\[](.+?)["|\]]#{delimiter})/m).each do |n|
85
- if n[1].include?("=")
86
- temp[n[0]] = parse_header(n[1], ",")
87
- else
88
- temp[n[0]] = n[1].strip
89
- end
90
- end
91
- return temp
92
- end
93
-
94
- def chop_tables(raw_tables)
95
- {}.tap do |output|
96
- raw_tables.scan(/\s*\"*([\w:]+)\"*\s*\[\w+\s*=\s*<(.+?)>\];/m).each do |n|
97
- output[ n[0].gsub('\"', '').strip ] = n[1].strip
98
- end
99
- end
100
- end
101
-
102
- def parse_nodes(raw_tables)
103
- result = []
104
- chop_tables(raw_tables).each do |name, table|
105
- node = {:name => name.sub("m_", "")}
106
- node[:fields] = []
107
- if table.include?("|")
108
- table.split("|")[1].scan(/port="([\w:]+)">[^<]+<[^>]+>(.+?)</m).each do |pair|
109
- node[:fields] << { :name => pair[0], :type => pair[1] }
110
- end
111
- end
112
- result << node
113
- end
114
- result
115
- end
116
-
117
- def parse_connections(node_chunk)
118
- output = []
119
- node_chunk.split("\n").each do |this_line|
120
- this_connection = {}
121
- temp = this_line.split("->")
122
- this_connection[:start_node] = temp[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
123
- this_connection[:end_node] = temp[1].split("[")[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
124
- tokens = temp[1].split("[")[1].split("]")[0].split(",")
125
- tokens.each do |token_string|
126
- token_pair = token_string.split("=")
127
- this_connection[token_pair[0].strip.gsub('"', '').gsub('\\', '').to_sym] = token_pair[1].strip.gsub('"', '').gsub('\\', '')
128
- end
129
- output << this_connection
130
- end
131
- return output
132
- end
133
-
134
-
14
+ # raw emails
15
+ def self.email(*args, &block)
16
+ require_relative "cabbage/email/email"
17
+ require_relative "cabbage/email/parser"
18
+ require_relative "cabbage/string_extras"
19
+ Email.new(args[0], &block)
135
20
  end
136
21
 
137
- end
138
-
22
+ end
@@ -0,0 +1,125 @@
1
+ # encoding: UTF-8
2
+ module Cabbage
3
+
4
+ class DotFile
5
+
6
+ # pass it a string containing either the DotFile itself, or the path to
7
+ # a DOTfile.
8
+ def initialize(source = nil)
9
+ @raw_dotfile = "" # unparsed DOTfile
10
+ @graph_type = "" #
11
+ @title = ""
12
+ @header = {}
13
+ @nodes = []
14
+ @connections = []
15
+ source != nil if parse(source)
16
+ end
17
+
18
+ attr_accessor :raw_dotfile, :graph_type, :title, :header, :nodes, :connections
19
+
20
+ # no public methods yet apart from accessors
21
+
22
+ # parsing methods below
23
+ private
24
+
25
+ def load_from_file(dotfile_path)
26
+ @raw_dotfile = IO.read(dotfile_path)
27
+ end
28
+
29
+ def load_from_string(dotfile)
30
+ @raw_dotfile = dotfile
31
+ end
32
+
33
+ def parse(source = nil)
34
+ begin
35
+ if source.class == String
36
+ if source.include?("\n")
37
+ load_from_string(source)
38
+ else
39
+ load_from_file(source)
40
+ end
41
+ parse_dotfile()
42
+ elsif source
43
+ raise
44
+ end
45
+ rescue
46
+ puts 'Unhandled parser exception! Parse failed.'
47
+ end
48
+ end
49
+
50
+ # a dotfile has four components:
51
+ # graph_type, header, nodes, connections
52
+ def parse_dotfile
53
+ # the chunk is everything inside '{}'
54
+ raw_chunk = @raw_dotfile.split("{")[1].split("}")[0].strip
55
+ # pull out the header
56
+ raw_header = raw_chunk.match(/([\w\s*=".,\s\[\]_\\]+;)*/m)[0]
57
+ # find body by chopping header off chunk
58
+ raw_body = raw_chunk.sub(raw_header, "")
59
+ # split the body on '>];', which delimits the tables section
60
+ raw_connections = raw_body.split(">];")[-1].strip
61
+ # split out the tables section from the body
62
+ raw_tables = raw_body.split(">];")[0 .. -2].join(">];").strip + " \n>];"
63
+
64
+ # assemble the output hash
65
+ @graph_type = @raw_dotfile.match(/\A\s*((?:di)?graph)/)[1]
66
+ @title = @raw_dotfile.match(/\A\s*(?:di)?graph\s*(\w+)/)[1]
67
+ @header = parse_header(raw_header, ";")
68
+ @nodes = parse_nodes(raw_tables)
69
+ @connections = parse_connections(raw_connections)
70
+ end
71
+
72
+ def parse_header(raw_header, delimiter)
73
+ temp = {}
74
+ raw_header.scan(/(\w+)(?:\s*=?\s*)(?:["|\[](.+?)["|\]]#{delimiter})/m).each do |n|
75
+ if n[1].include?("=")
76
+ temp[n[0]] = parse_header(n[1], ",")
77
+ else
78
+ temp[n[0]] = n[1].strip
79
+ end
80
+ end
81
+ return temp
82
+ end
83
+
84
+ def chop_tables(raw_tables)
85
+ {}.tap do |output|
86
+ raw_tables.scan(/\s*\"*([\w:]+)\"*\s*\[\w+\s*=\s*<(.+?)>\];/m).each do |n|
87
+ output[ n[0].gsub('\"', '').strip ] = n[1].strip
88
+ end
89
+ end
90
+ end
91
+
92
+ def parse_nodes(raw_tables)
93
+ result = []
94
+ chop_tables(raw_tables).each do |name, table|
95
+ node = {:name => name.sub("m_", "")}
96
+ node[:fields] = []
97
+ if table.include?("|")
98
+ table.split("|")[1].scan(/port="([\w:]+)">[^<]+<[^>]+>(.+?)</m).each do |pair|
99
+ node[:fields] << { :name => pair[0], :type => pair[1] }
100
+ end
101
+ end
102
+ result << node
103
+ end
104
+ result
105
+ end
106
+
107
+ def parse_connections(node_chunk)
108
+ output = []
109
+ node_chunk.split("\n").each do |this_line|
110
+ this_connection = {}
111
+ temp = this_line.split("->")
112
+ this_connection[:start_node] = temp[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
113
+ this_connection[:end_node] = temp[1].split("[")[0].gsub('"', '').gsub('\\', '').sub('m_', '').strip
114
+ tokens = temp[1].split("[")[1].split("]")[0].split(",")
115
+ tokens.each do |token_string|
116
+ token_pair = token_string.split("=")
117
+ this_connection[token_pair[0].strip.gsub('"', '').gsub('\\', '').to_sym] = token_pair[1].strip.gsub('"', '').gsub('\\', '')
118
+ end
119
+ output << this_connection
120
+ end
121
+ return output
122
+ end
123
+ end # end DotFile class
124
+
125
+ end # end Cabbage module
@@ -0,0 +1,148 @@
1
+ require_relative "parser"
2
+ require_relative "mime_part"
3
+ module Cabbage
4
+ class Email
5
+ include EmailParser
6
+
7
+ def initialize(source = "")
8
+ @raw_source = ""
9
+ @raw_parsed = {}
10
+ @header = {}
11
+ @original_keys = {}
12
+ @parts = []
13
+ @multipart = false
14
+ if source != nil && source.class.to_s == "String"
15
+ if source.include?("\n")
16
+ @raw_source = source
17
+ parse()
18
+ else
19
+ load_and_parse(source)
20
+ end
21
+ else
22
+ raise "Bad input to Cabbage::Email#new."
23
+ end
24
+ end
25
+
26
+ attr_reader :raw_source, :raw_parsed, :header, :original_keys, :parts, :multipart
27
+
28
+ def method_missing(m, *args, &block)
29
+ if @header.keys.include?(m.intern)
30
+ @header[m.intern]
31
+ else
32
+ raise "undefined method in Cabbage::Email"
33
+ end
34
+ end
35
+
36
+ def load(filename)
37
+ open(filename) {|f| @raw_source = f.read }
38
+ end
39
+
40
+ def load_and_parse(filename)
41
+ open(filename) {|f| @raw_source = f.read }
42
+ parse()
43
+ end
44
+
45
+ def parse
46
+ if @raw_source.empty?
47
+ puts "Nothing to parse."
48
+ return false
49
+ else
50
+ @raw_parsed = EmailParser.parse_email(@raw_source)
51
+ end
52
+ @header = @raw_parsed[:header]
53
+ @original_keys = @raw_parsed[:original_keys]
54
+ if header[:content_type].include?("multipart")
55
+ @multipart = true
56
+ make_flat(@raw_parsed).each do |raw_part|
57
+ @parts << MimePart.new(raw_part)
58
+ end
59
+ else
60
+ make_flat(@raw_parsed).each do |raw_part|
61
+ @parts << MimePart.new(raw_part)
62
+ end
63
+ end
64
+ return true
65
+ end
66
+
67
+ def multipart?
68
+ @multipart
69
+ end
70
+
71
+ def [](key)
72
+ if @header.has_key?(key)
73
+ @header[key]
74
+ elsif key == :body
75
+ self.body
76
+ elsif self.mime_types.include?(key)
77
+ @parts[self.mime_types.index(key)].body
78
+ elsif key.class == FixNum
79
+
80
+ else
81
+ nil
82
+ end
83
+ end
84
+
85
+ def keys
86
+ @header.keys + [:body] + self.mime_types
87
+ end
88
+
89
+ def attachments
90
+ [].tap do |output|
91
+ @parts.each do |part|
92
+ output << part if part.attachment?
93
+ end
94
+ end
95
+ end
96
+
97
+ # returns an array of strings representing available
98
+ # mime types in the array of mime parts.
99
+ def mime_types
100
+ [].tap do |result|
101
+ @parts.each do |part|
102
+ result << part.content_type
103
+ end
104
+ end
105
+ end
106
+
107
+ def body(type = "text/plain")
108
+ if @multipart
109
+ if self.mime_types.include?(type)
110
+ @parts[self.mime_types.index(type)].body
111
+ elsif @parts.size > 0
112
+ @parts[0].body
113
+ else
114
+ nil
115
+ end
116
+ else
117
+ @raw_parsed[:body]
118
+ end
119
+ end
120
+
121
+ #####################################################################
122
+ ########################## PRIVATE METHODS ##########################
123
+ #####################################################################
124
+
125
+ private
126
+
127
+ def make_flat(tree)
128
+ results = []
129
+
130
+ if tree[:body].class == Hash
131
+ tree[:body].each do |this_part|
132
+ if this_part[:header][:content_type] =~ /^multipart/
133
+ results << make_flat(this_part)
134
+ else
135
+ results << this_part
136
+ end
137
+ end
138
+ elsif tree[:body].class == String
139
+ results << tree
140
+ else
141
+ # something went wrong
142
+ end
143
+ return results.flatten
144
+ end
145
+
146
+ end # end Message class
147
+
148
+ end # end Jmail module
@@ -0,0 +1,38 @@
1
+ module Cabbage
2
+ class MimePart
3
+
4
+ def initialize(raw_part)
5
+ @raw_source = raw_part
6
+ @header = raw_part[:header]
7
+ @original_keys = raw_part[:original_keys]
8
+ @body = raw_part[:body]
9
+ @content_type = @header[:content_type].split(";")[0].strip
10
+ end
11
+ attr_accessor :raw_source, :header, :original_keys, :body, :content_type
12
+
13
+ def method_missing(m, *args, &block)
14
+ if @header.keys.include?(m.intern)
15
+ @header[m.intern]
16
+ else
17
+ raise "undefined method in Cabbage::MimePart"
18
+ end
19
+ end
20
+
21
+ def [](key)
22
+ @header[key]
23
+ end
24
+
25
+ def keys
26
+ @header.keys
27
+ end
28
+
29
+ def attachment?
30
+ if @header.has_key?(:content_disposition) && @header[:content_disposition].start_with?("attachment")
31
+ true
32
+ else
33
+ false
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,47 @@
1
+ module Cabbage
2
+
3
+ module EmailParser
4
+
5
+ require_relative "../string_extras"
6
+
7
+ def EmailParser.parse_email(source)
8
+ return {} if source.empty?
9
+ results = { :header => {}, :original_keys => {}, :body => [] }
10
+ divided = source.strip.match(/\A\s*(.+?)(?:\r\n\r\n|\n\n)(.+)/m)
11
+ raw_header = divided[1]
12
+ raw_body = divided[2]
13
+ raw_header.gsub(/\n\s+/, " ").split("\n").each do |this_line|
14
+ this_pair = this_line.split(/:\s*/, 2)
15
+ this_key_string = this_pair[0].strip
16
+ this_key_symbol = this_key_string.down_under.intern
17
+ this_value = this_pair[1]
18
+ if results[:header].has_key?(this_key_symbol)
19
+ unless results[:header][this_key_symbol].class == Array
20
+ results[:header][this_key_symbol] = [results[:header][this_key_symbol]]
21
+ end
22
+ results[:header][this_key_symbol] << this_value
23
+ else
24
+ results[:header][this_key_symbol] = this_value.strip
25
+ results[:original_keys][this_key_symbol] = this_key_string
26
+ end
27
+ end
28
+ if results[:header][:content_type].start_with?("multipart")
29
+ boundary = EmailParser.extract_value(results[:header][:content_type], "boundary")
30
+ EmailParser.break_by_boundary(raw_body, boundary).each {|n| results[:body] << EmailParser.parse_email(n)}
31
+ else
32
+ results[:body] = raw_body
33
+ end
34
+ results
35
+ end
36
+
37
+ def EmailParser.break_by_boundary(source, boundary)
38
+ source.split("--" + boundary).keep_if {|n| n != "--" && n != ""}.each {|n| n.strip!}
39
+ end
40
+
41
+ def EmailParser.extract_value(target_string, key)
42
+ target_string.match(/#{key}=["']?([^;\s"']+)/i)[1]
43
+ end
44
+
45
+ end # end Parser module
46
+
47
+ end
@@ -0,0 +1,18 @@
1
+ # a couple of extra methods for strings, to make things easier
2
+
3
+ class String
4
+
5
+ # if a blank? method isn't yet defined for strings, do so
6
+ # here. If run within rails, this should already be
7
+ # defined. (via ActiveSupport)
8
+ if !(String.public_method_defined? :blank?)
9
+ def blank?
10
+ self !~ /\S/
11
+ end
12
+ end
13
+
14
+ def down_under
15
+ self.gsub("-", "_").downcase
16
+ end
17
+
18
+ end # end String class additions
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cabbage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-24 00:00:00.000000000Z
12
+ date: 2011-09-26 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: shoulda
16
- requirement: &15137480 !ruby/object:Gem::Requirement
16
+ requirement: &12429820 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *15137480
24
+ version_requirements: *12429820
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: bundler
27
- requirement: &15135640 !ruby/object:Gem::Requirement
27
+ requirement: &12425540 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.0.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *15135640
35
+ version_requirements: *12425540
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: jeweler
38
- requirement: &15133500 !ruby/object:Gem::Requirement
38
+ requirement: &12420200 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.6.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *15133500
46
+ version_requirements: *12420200
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rcov
49
- requirement: &15128820 !ruby/object:Gem::Requirement
49
+ requirement: &12400380 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *15128820
57
+ version_requirements: *12400380
58
58
  description: More to come.
59
59
  email: josh.lauer75@gmail.com
60
60
  executables: []
@@ -72,6 +72,11 @@ files:
72
72
  - VERSION
73
73
  - cabbage.gemspec
74
74
  - lib/cabbage.rb
75
+ - lib/cabbage/dotfile/parser.rb
76
+ - lib/cabbage/email/email.rb
77
+ - lib/cabbage/email/mime_part.rb
78
+ - lib/cabbage/email/parser.rb
79
+ - lib/cabbage/string_extras.rb
75
80
  - test/helper.rb
76
81
  - test/test_cabbage.rb
77
82
  homepage: http://github.com/josh-lauer/cabbage
@@ -89,7 +94,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
89
94
  version: '0'
90
95
  segments:
91
96
  - 0
92
- hash: 1830193836739110796
97
+ hash: 634790544501283345
93
98
  required_rubygems_version: !ruby/object:Gem::Requirement
94
99
  none: false
95
100
  requirements: