colander 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/README.md +15 -0
- data/Rakefile +2 -0
- data/colander.gemspec +26 -0
- data/lib/colander/invalid_file.rb +4 -0
- data/lib/colander/parser/base.rb +15 -0
- data/lib/colander/parser/null.rb +14 -0
- data/lib/colander/parser/xls.rb +30 -0
- data/lib/colander/parser/xlsx.rb +13 -0
- data/lib/colander/version.rb +3 -0
- data/lib/colander.rb +18 -0
- data/spec/base_spec.rb +10 -0
- data/spec/colander_spec.rb +45 -0
- data/spec/fixtures/excel95-without-file-suffix +0 -0
- data/spec/fixtures/excel95.xls +0 -0
- data/spec/fixtures/new-format.xlsx +0 -0
- data/spec/fixtures/old-format.xls +0 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/xls_spec.rb +36 -0
- data/spec/xlsx_spec.rb +14 -0
- metadata +157 -0
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm --create ruby-1.8.7-p302@email_extractor
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Colander
|
2
|
+
==============
|
3
|
+
Colander is a gem to collect emails from various file formats.
|
4
|
+
|
5
|
+
Supported formats
|
6
|
+
|
7
|
+
* "Old" Ms Excel files, ending in .xls
|
8
|
+
* Ms Excel files ending with .xlsx
|
9
|
+
|
10
|
+
Usage
|
11
|
+
-----
|
12
|
+
|
13
|
+
parser = Colander.parse("/path/to/file.xls")
|
14
|
+
parser.emails # => ["foo@bar.com"]
|
15
|
+
|
data/Rakefile
ADDED
data/colander.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "colander/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "colander"
|
7
|
+
s.version = Colander::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Kristian Hellquist", "Jonas Forsberg"]
|
10
|
+
s.email = ["dev@mynewsdesk.com"]
|
11
|
+
s.homepage = "http://devcorner.mynewsdesk.com"
|
12
|
+
s.summary = %q{Exctract an array of emails from various file formats}
|
13
|
+
s.description = %q{See summary. lol}
|
14
|
+
|
15
|
+
# s.rubyforge_project = "mnd_colander"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency "nokogiri", "~> 1.4.4"
|
23
|
+
s.add_dependency "roo", "~> 1.9.5"
|
24
|
+
s.add_dependency "zip", "~> 2.0.2"
|
25
|
+
s.add_development_dependency "rspec"
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'colander/invalid_file'
|
2
|
+
require 'colander/parser/base'
|
3
|
+
require 'roo'
|
4
|
+
|
5
|
+
module Colander
|
6
|
+
module Parser
|
7
|
+
class Xls < Base
|
8
|
+
def parse
|
9
|
+
spreadsheet = parse_file
|
10
|
+
@emails = collect_emails spreadsheet
|
11
|
+
rescue TypeError, IOError
|
12
|
+
raise InvalidFile
|
13
|
+
end
|
14
|
+
|
15
|
+
protected
|
16
|
+
|
17
|
+
def parse_file
|
18
|
+
Excel.new(@file_path,nil,:ignore)
|
19
|
+
end
|
20
|
+
|
21
|
+
def collect_emails(spreadsheet)
|
22
|
+
spreadsheet.sheets.map do |sheet|
|
23
|
+
spreadsheet.default_sheet = sheet
|
24
|
+
spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
|
25
|
+
end.flatten
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/colander.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'colander/invalid_file'
|
2
|
+
require 'colander/parser/xls'
|
3
|
+
require 'colander/parser/xlsx'
|
4
|
+
|
5
|
+
module Colander
|
6
|
+
def self.parse(file_path, file_name = nil)
|
7
|
+
parser = case (file_name || file_path).split(".").last
|
8
|
+
when "xls"
|
9
|
+
Parser::Xls.new(file_path)
|
10
|
+
when "xlsx"
|
11
|
+
Parser::Xlsx.new(file_path)
|
12
|
+
else
|
13
|
+
raise InvalidFile
|
14
|
+
end
|
15
|
+
parser.parse
|
16
|
+
parser
|
17
|
+
end
|
18
|
+
end
|
data/spec/base_spec.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Colander do
|
4
|
+
describe ".parse" do
|
5
|
+
it "should raise error if no file path is passed" do
|
6
|
+
lambda{
|
7
|
+
Colander.parse
|
8
|
+
}.should raise_exception(ArgumentError)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should raise error if file is unsupported" do
|
12
|
+
lambda {
|
13
|
+
Colander.parse("foo.bar")
|
14
|
+
}.should raise_error(Colander::InvalidFile)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should not raise error if a file path is passed" do
|
18
|
+
lambda{
|
19
|
+
Colander.parse("/file/path")
|
20
|
+
}.should_not raise_exception(ArgumentError)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "accepts an option file name and returns correct parser" do
|
24
|
+
Colander::Parser::Xls.any_instance.stub(:parse)
|
25
|
+
Colander.parse("/file/path", "apa.xls").should be_a Colander::Parser::Xls
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should recognize a xls file and return correct parser" do
|
29
|
+
path = "/foo/apa.xls"
|
30
|
+
Colander::Parser::Xls.any_instance.stub(:parse)
|
31
|
+
Colander.parse(path).should be_a Colander::Parser::Xls
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should recognize a xlsx file and return correct parser" do
|
35
|
+
path = "/foo/apa.xlsx"
|
36
|
+
Colander::Parser::Xlsx.any_instance.stub(:parse)
|
37
|
+
Colander.parse(path).should be_a Colander::Parser::Xlsx
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should invoke parse method on the parser" do
|
41
|
+
Colander::Parser::Xlsx.any_instance.should_receive(:parse)
|
42
|
+
Colander.parse("/foo/bar.xlsx")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/spec/spec_helper.rb
ADDED
data/spec/xls_spec.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Colander::Parser::Xls do
|
4
|
+
describe "#parse" do
|
5
|
+
it "stores found emails" do
|
6
|
+
Excel.stub(:new)
|
7
|
+
parser = Colander::Parser::Xls.new("file/path")
|
8
|
+
parser.should_receive(:collect_emails).and_return(["bruce@wayne.com"])
|
9
|
+
parser.parse
|
10
|
+
parser.emails.sort.should eql(["bruce@wayne.com"])
|
11
|
+
end
|
12
|
+
|
13
|
+
it "raises exception if file is invalid" do
|
14
|
+
parser = Colander::Parser::Xls.new("./#{__FILE__}")
|
15
|
+
lambda {
|
16
|
+
parser.parse
|
17
|
+
}.should raise_error(Colander::InvalidFile)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "retreives emails from an 95-excel spreadsheet" do
|
21
|
+
parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
|
22
|
+
parser.parse
|
23
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
24
|
+
end
|
25
|
+
it "retreives emails from an xls spreadsheet" do
|
26
|
+
parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
|
27
|
+
parser.parse
|
28
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
29
|
+
end
|
30
|
+
it "retreives emails from an 95-excel spreadsheet without file suffix" do
|
31
|
+
parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
|
32
|
+
parser.parse
|
33
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/spec/xlsx_spec.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
describe Colander::Parser::Xlsx do
|
3
|
+
it "is subclass of Colander::Parser::Xls" do
|
4
|
+
Colander::Parser::Xlsx.new("foo/bar").should be_a Colander::Parser::Xls
|
5
|
+
end
|
6
|
+
|
7
|
+
describe "#parse" do
|
8
|
+
it "retreives emails from an xlsx spreadsheet" do
|
9
|
+
parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
|
10
|
+
parser.parse
|
11
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: colander
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Kristian Hellquist
|
14
|
+
- Jonas Forsberg
|
15
|
+
autorequire:
|
16
|
+
bindir: bin
|
17
|
+
cert_chain: []
|
18
|
+
|
19
|
+
date: 2011-08-29 00:00:00 Z
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: nokogiri
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 15
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 4
|
33
|
+
- 4
|
34
|
+
version: 1.4.4
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: roo
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 57
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 9
|
49
|
+
- 5
|
50
|
+
version: 1.9.5
|
51
|
+
type: :runtime
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: zip
|
55
|
+
prerelease: false
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
hash: 11
|
62
|
+
segments:
|
63
|
+
- 2
|
64
|
+
- 0
|
65
|
+
- 2
|
66
|
+
version: 2.0.2
|
67
|
+
type: :runtime
|
68
|
+
version_requirements: *id003
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
prerelease: false
|
72
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
type: :development
|
82
|
+
version_requirements: *id004
|
83
|
+
description: See summary. lol
|
84
|
+
email:
|
85
|
+
- dev@mynewsdesk.com
|
86
|
+
executables: []
|
87
|
+
|
88
|
+
extensions: []
|
89
|
+
|
90
|
+
extra_rdoc_files: []
|
91
|
+
|
92
|
+
files:
|
93
|
+
- .gitignore
|
94
|
+
- .rvmrc
|
95
|
+
- Gemfile
|
96
|
+
- README.md
|
97
|
+
- Rakefile
|
98
|
+
- colander.gemspec
|
99
|
+
- lib/colander.rb
|
100
|
+
- lib/colander/invalid_file.rb
|
101
|
+
- lib/colander/parser/base.rb
|
102
|
+
- lib/colander/parser/null.rb
|
103
|
+
- lib/colander/parser/xls.rb
|
104
|
+
- lib/colander/parser/xlsx.rb
|
105
|
+
- lib/colander/version.rb
|
106
|
+
- spec/base_spec.rb
|
107
|
+
- spec/colander_spec.rb
|
108
|
+
- spec/fixtures/excel95-without-file-suffix
|
109
|
+
- spec/fixtures/excel95.xls
|
110
|
+
- spec/fixtures/new-format.xlsx
|
111
|
+
- spec/fixtures/old-format.xls
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
- spec/xls_spec.rb
|
114
|
+
- spec/xlsx_spec.rb
|
115
|
+
homepage: http://devcorner.mynewsdesk.com
|
116
|
+
licenses: []
|
117
|
+
|
118
|
+
post_install_message:
|
119
|
+
rdoc_options: []
|
120
|
+
|
121
|
+
require_paths:
|
122
|
+
- lib
|
123
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
none: false
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
hash: 3
|
129
|
+
segments:
|
130
|
+
- 0
|
131
|
+
version: "0"
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
133
|
+
none: false
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
hash: 3
|
138
|
+
segments:
|
139
|
+
- 0
|
140
|
+
version: "0"
|
141
|
+
requirements: []
|
142
|
+
|
143
|
+
rubyforge_project:
|
144
|
+
rubygems_version: 1.8.10
|
145
|
+
signing_key:
|
146
|
+
specification_version: 3
|
147
|
+
summary: Exctract an array of emails from various file formats
|
148
|
+
test_files:
|
149
|
+
- spec/base_spec.rb
|
150
|
+
- spec/colander_spec.rb
|
151
|
+
- spec/fixtures/excel95-without-file-suffix
|
152
|
+
- spec/fixtures/excel95.xls
|
153
|
+
- spec/fixtures/new-format.xlsx
|
154
|
+
- spec/fixtures/old-format.xls
|
155
|
+
- spec/spec_helper.rb
|
156
|
+
- spec/xls_spec.rb
|
157
|
+
- spec/xlsx_spec.rb
|