colander 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm --create ruby-1.8.7-p302@email_extractor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in colander.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ Colander
2
+ ==============
3
+ Colander is a gem to collect emails from various file formats.
4
+
5
+ Supported formats
6
+
7
+ * "Old" Ms Excel files, ending in .xls
8
+ * Ms Excel files ending with .xlsx
9
+
10
+ Usage
11
+ -----
12
+
13
+ parser = Colander.parse("/path/to/file.xls")
14
+ parser.emails # => ["foo@bar.com"]
15
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/colander.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "colander/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "colander"
7
+ s.version = Colander::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Kristian Hellquist", "Jonas Forsberg"]
10
+ s.email = ["dev@mynewsdesk.com"]
11
+ s.homepage = "http://devcorner.mynewsdesk.com"
12
+ s.summary = %q{Exctract an array of emails from various file formats}
13
+ s.description = %q{See summary. lol}
14
+
15
+ # s.rubyforge_project = "mnd_colander"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency "nokogiri", "~> 1.4.4"
23
+ s.add_dependency "roo", "~> 1.9.5"
24
+ s.add_dependency "zip", "~> 2.0.2"
25
+ s.add_development_dependency "rspec"
26
+ end
@@ -0,0 +1,4 @@
1
+ module Colander
2
+ class InvalidFile < StandardError
3
+ end
4
+ end
@@ -0,0 +1,15 @@
1
+ module Colander
2
+ module Parser
3
+ class Base
4
+ attr_reader :emails
5
+
6
+ def initialize(file_path)
7
+ @file_path = file_path
8
+ end
9
+
10
+ def parse
11
+ raise "plz implement me in"
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,14 @@
1
+ require 'colander/parser/base'
2
+
3
+ module Colander
4
+ module Parser
5
+ class Null
6
+ def parse
7
+ end
8
+ def emails
9
+ []
10
+ end
11
+ end
12
+ end
13
+ end
14
+
@@ -0,0 +1,30 @@
1
+ require 'colander/invalid_file'
2
+ require 'colander/parser/base'
3
+ require 'roo'
4
+
5
+ module Colander
6
+ module Parser
7
+ class Xls < Base
8
+ def parse
9
+ spreadsheet = parse_file
10
+ @emails = collect_emails spreadsheet
11
+ rescue TypeError, IOError
12
+ raise InvalidFile
13
+ end
14
+
15
+ protected
16
+
17
+ def parse_file
18
+ Excel.new(@file_path,nil,:ignore)
19
+ end
20
+
21
+ def collect_emails(spreadsheet)
22
+ spreadsheet.sheets.map do |sheet|
23
+ spreadsheet.default_sheet = sheet
24
+ spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
25
+ end.flatten
26
+ end
27
+
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,13 @@
1
+ require 'colander/parser/base'
2
+ require 'roo'
3
+
4
+ module Colander
5
+ module Parser
6
+ class Xlsx < Xls
7
+ protected
8
+ def parse_file
9
+ Excelx.new(@file_path,nil,:ignore)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Colander
2
+ VERSION = "0.0.1"
3
+ end
data/lib/colander.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'colander/invalid_file'
2
+ require 'colander/parser/xls'
3
+ require 'colander/parser/xlsx'
4
+
5
+ module Colander
6
+ def self.parse(file_path, file_name = nil)
7
+ parser = case (file_name || file_path).split(".").last
8
+ when "xls"
9
+ Parser::Xls.new(file_path)
10
+ when "xlsx"
11
+ Parser::Xlsx.new(file_path)
12
+ else
13
+ raise InvalidFile
14
+ end
15
+ parser.parse
16
+ parser
17
+ end
18
+ end
data/spec/base_spec.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'spec_helper'
2
+
3
+ describe Colander::Parser::Base do
4
+ describe "#emails" do
5
+ it "should responses to it" do
6
+ Colander::Parser::Base.any_instance.stub(:parse)
7
+ Colander::Parser::Base.new("foo/bar").should respond_to :emails
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+
3
+ describe Colander do
4
+ describe ".parse" do
5
+ it "should raise error if no file path is passed" do
6
+ lambda{
7
+ Colander.parse
8
+ }.should raise_exception(ArgumentError)
9
+ end
10
+
11
+ it "should raise error if file is unsupported" do
12
+ lambda {
13
+ Colander.parse("foo.bar")
14
+ }.should raise_error(Colander::InvalidFile)
15
+ end
16
+
17
+ it "should not raise error if a file path is passed" do
18
+ lambda{
19
+ Colander.parse("/file/path")
20
+ }.should_not raise_exception(ArgumentError)
21
+ end
22
+
23
+ it "accepts an option file name and returns correct parser" do
24
+ Colander::Parser::Xls.any_instance.stub(:parse)
25
+ Colander.parse("/file/path", "apa.xls").should be_a Colander::Parser::Xls
26
+ end
27
+
28
+ it "should recognize a xls file and return correct parser" do
29
+ path = "/foo/apa.xls"
30
+ Colander::Parser::Xls.any_instance.stub(:parse)
31
+ Colander.parse(path).should be_a Colander::Parser::Xls
32
+ end
33
+
34
+ it "should recognize a xlsx file and return correct parser" do
35
+ path = "/foo/apa.xlsx"
36
+ Colander::Parser::Xlsx.any_instance.stub(:parse)
37
+ Colander.parse(path).should be_a Colander::Parser::Xlsx
38
+ end
39
+
40
+ it "should invoke parse method on the parser" do
41
+ Colander::Parser::Xlsx.any_instance.should_receive(:parse)
42
+ Colander.parse("/foo/bar.xlsx")
43
+ end
44
+ end
45
+ end
Binary file
Binary file
Binary file
@@ -0,0 +1,3 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+
3
+ require 'colander'
data/spec/xls_spec.rb ADDED
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Colander::Parser::Xls do
4
+ describe "#parse" do
5
+ it "stores found emails" do
6
+ Excel.stub(:new)
7
+ parser = Colander::Parser::Xls.new("file/path")
8
+ parser.should_receive(:collect_emails).and_return(["bruce@wayne.com"])
9
+ parser.parse
10
+ parser.emails.sort.should eql(["bruce@wayne.com"])
11
+ end
12
+
13
+ it "raises exception if file is invalid" do
14
+ parser = Colander::Parser::Xls.new("./#{__FILE__}")
15
+ lambda {
16
+ parser.parse
17
+ }.should raise_error(Colander::InvalidFile)
18
+ end
19
+
20
+ it "retreives emails from an 95-excel spreadsheet" do
21
+ parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
22
+ parser.parse
23
+ parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
24
+ end
25
+ it "retreives emails from an xls spreadsheet" do
26
+ parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
27
+ parser.parse
28
+ parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
29
+ end
30
+ it "retreives emails from an 95-excel spreadsheet without file suffix" do
31
+ parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
32
+ parser.parse
33
+ parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
34
+ end
35
+ end
36
+ end
data/spec/xlsx_spec.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+ describe Colander::Parser::Xlsx do
3
+ it "is subclass of Colander::Parser::Xls" do
4
+ Colander::Parser::Xlsx.new("foo/bar").should be_a Colander::Parser::Xls
5
+ end
6
+
7
+ describe "#parse" do
8
+ it "retreives emails from an xlsx spreadsheet" do
9
+ parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
10
+ parser.parse
11
+ parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
12
+ end
13
+ end
14
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: colander
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Kristian Hellquist
14
+ - Jonas Forsberg
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2011-08-29 00:00:00 Z
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 15
30
+ segments:
31
+ - 1
32
+ - 4
33
+ - 4
34
+ version: 1.4.4
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: roo
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ hash: 57
46
+ segments:
47
+ - 1
48
+ - 9
49
+ - 5
50
+ version: 1.9.5
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: zip
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ hash: 11
62
+ segments:
63
+ - 2
64
+ - 0
65
+ - 2
66
+ version: 2.0.2
67
+ type: :runtime
68
+ version_requirements: *id003
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ prerelease: false
72
+ requirement: &id004 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ hash: 3
78
+ segments:
79
+ - 0
80
+ version: "0"
81
+ type: :development
82
+ version_requirements: *id004
83
+ description: See summary. lol
84
+ email:
85
+ - dev@mynewsdesk.com
86
+ executables: []
87
+
88
+ extensions: []
89
+
90
+ extra_rdoc_files: []
91
+
92
+ files:
93
+ - .gitignore
94
+ - .rvmrc
95
+ - Gemfile
96
+ - README.md
97
+ - Rakefile
98
+ - colander.gemspec
99
+ - lib/colander.rb
100
+ - lib/colander/invalid_file.rb
101
+ - lib/colander/parser/base.rb
102
+ - lib/colander/parser/null.rb
103
+ - lib/colander/parser/xls.rb
104
+ - lib/colander/parser/xlsx.rb
105
+ - lib/colander/version.rb
106
+ - spec/base_spec.rb
107
+ - spec/colander_spec.rb
108
+ - spec/fixtures/excel95-without-file-suffix
109
+ - spec/fixtures/excel95.xls
110
+ - spec/fixtures/new-format.xlsx
111
+ - spec/fixtures/old-format.xls
112
+ - spec/spec_helper.rb
113
+ - spec/xls_spec.rb
114
+ - spec/xlsx_spec.rb
115
+ homepage: http://devcorner.mynewsdesk.com
116
+ licenses: []
117
+
118
+ post_install_message:
119
+ rdoc_options: []
120
+
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ none: false
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ hash: 3
129
+ segments:
130
+ - 0
131
+ version: "0"
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ none: false
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ hash: 3
138
+ segments:
139
+ - 0
140
+ version: "0"
141
+ requirements: []
142
+
143
+ rubyforge_project:
144
+ rubygems_version: 1.8.10
145
+ signing_key:
146
+ specification_version: 3
147
+ summary: Exctract an array of emails from various file formats
148
+ test_files:
149
+ - spec/base_spec.rb
150
+ - spec/colander_spec.rb
151
+ - spec/fixtures/excel95-without-file-suffix
152
+ - spec/fixtures/excel95.xls
153
+ - spec/fixtures/new-format.xlsx
154
+ - spec/fixtures/old-format.xls
155
+ - spec/spec_helper.rb
156
+ - spec/xls_spec.rb
157
+ - spec/xlsx_spec.rb