colander 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/README.md +15 -0
- data/Rakefile +2 -0
- data/colander.gemspec +26 -0
- data/lib/colander/invalid_file.rb +4 -0
- data/lib/colander/parser/base.rb +15 -0
- data/lib/colander/parser/null.rb +14 -0
- data/lib/colander/parser/xls.rb +30 -0
- data/lib/colander/parser/xlsx.rb +13 -0
- data/lib/colander/version.rb +3 -0
- data/lib/colander.rb +18 -0
- data/spec/base_spec.rb +10 -0
- data/spec/colander_spec.rb +45 -0
- data/spec/fixtures/excel95-without-file-suffix +0 -0
- data/spec/fixtures/excel95.xls +0 -0
- data/spec/fixtures/new-format.xlsx +0 -0
- data/spec/fixtures/old-format.xls +0 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/xls_spec.rb +36 -0
- data/spec/xlsx_spec.rb +14 -0
- metadata +157 -0
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm --create ruby-1.8.7-p302@email_extractor
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Colander
|
2
|
+
==============
|
3
|
+
Colander is a gem to collect emails from various file formats.
|
4
|
+
|
5
|
+
Supported formats
|
6
|
+
|
7
|
+
* "Old" Ms Excel files, ending in .xls
|
8
|
+
* Ms Excel files ending with .xlsx
|
9
|
+
|
10
|
+
Usage
|
11
|
+
-----
|
12
|
+
|
13
|
+
parser = Colander.parse("/path/to/file.xls")
|
14
|
+
parser.emails # => ["foo@bar.com"]
|
15
|
+
|
data/Rakefile
ADDED
data/colander.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "colander/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "colander"
|
7
|
+
s.version = Colander::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Kristian Hellquist", "Jonas Forsberg"]
|
10
|
+
s.email = ["dev@mynewsdesk.com"]
|
11
|
+
s.homepage = "http://devcorner.mynewsdesk.com"
|
12
|
+
s.summary = %q{Exctract an array of emails from various file formats}
|
13
|
+
s.description = %q{See summary. lol}
|
14
|
+
|
15
|
+
# s.rubyforge_project = "mnd_colander"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency "nokogiri", "~> 1.4.4"
|
23
|
+
s.add_dependency "roo", "~> 1.9.5"
|
24
|
+
s.add_dependency "zip", "~> 2.0.2"
|
25
|
+
s.add_development_dependency "rspec"
|
26
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'colander/invalid_file'
|
2
|
+
require 'colander/parser/base'
|
3
|
+
require 'roo'
|
4
|
+
|
5
|
+
module Colander
|
6
|
+
module Parser
|
7
|
+
class Xls < Base
|
8
|
+
def parse
|
9
|
+
spreadsheet = parse_file
|
10
|
+
@emails = collect_emails spreadsheet
|
11
|
+
rescue TypeError, IOError
|
12
|
+
raise InvalidFile
|
13
|
+
end
|
14
|
+
|
15
|
+
protected
|
16
|
+
|
17
|
+
def parse_file
|
18
|
+
Excel.new(@file_path,nil,:ignore)
|
19
|
+
end
|
20
|
+
|
21
|
+
def collect_emails(spreadsheet)
|
22
|
+
spreadsheet.sheets.map do |sheet|
|
23
|
+
spreadsheet.default_sheet = sheet
|
24
|
+
spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
|
25
|
+
end.flatten
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/colander.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'colander/invalid_file'
|
2
|
+
require 'colander/parser/xls'
|
3
|
+
require 'colander/parser/xlsx'
|
4
|
+
|
5
|
+
module Colander
|
6
|
+
def self.parse(file_path, file_name = nil)
|
7
|
+
parser = case (file_name || file_path).split(".").last
|
8
|
+
when "xls"
|
9
|
+
Parser::Xls.new(file_path)
|
10
|
+
when "xlsx"
|
11
|
+
Parser::Xlsx.new(file_path)
|
12
|
+
else
|
13
|
+
raise InvalidFile
|
14
|
+
end
|
15
|
+
parser.parse
|
16
|
+
parser
|
17
|
+
end
|
18
|
+
end
|
data/spec/base_spec.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Colander do
|
4
|
+
describe ".parse" do
|
5
|
+
it "should raise error if no file path is passed" do
|
6
|
+
lambda{
|
7
|
+
Colander.parse
|
8
|
+
}.should raise_exception(ArgumentError)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should raise error if file is unsupported" do
|
12
|
+
lambda {
|
13
|
+
Colander.parse("foo.bar")
|
14
|
+
}.should raise_error(Colander::InvalidFile)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should not raise error if a file path is passed" do
|
18
|
+
lambda{
|
19
|
+
Colander.parse("/file/path")
|
20
|
+
}.should_not raise_exception(ArgumentError)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "accepts an option file name and returns correct parser" do
|
24
|
+
Colander::Parser::Xls.any_instance.stub(:parse)
|
25
|
+
Colander.parse("/file/path", "apa.xls").should be_a Colander::Parser::Xls
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should recognize a xls file and return correct parser" do
|
29
|
+
path = "/foo/apa.xls"
|
30
|
+
Colander::Parser::Xls.any_instance.stub(:parse)
|
31
|
+
Colander.parse(path).should be_a Colander::Parser::Xls
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should recognize a xlsx file and return correct parser" do
|
35
|
+
path = "/foo/apa.xlsx"
|
36
|
+
Colander::Parser::Xlsx.any_instance.stub(:parse)
|
37
|
+
Colander.parse(path).should be_a Colander::Parser::Xlsx
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should invoke parse method on the parser" do
|
41
|
+
Colander::Parser::Xlsx.any_instance.should_receive(:parse)
|
42
|
+
Colander.parse("/foo/bar.xlsx")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/spec/spec_helper.rb
ADDED
data/spec/xls_spec.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Colander::Parser::Xls do
|
4
|
+
describe "#parse" do
|
5
|
+
it "stores found emails" do
|
6
|
+
Excel.stub(:new)
|
7
|
+
parser = Colander::Parser::Xls.new("file/path")
|
8
|
+
parser.should_receive(:collect_emails).and_return(["bruce@wayne.com"])
|
9
|
+
parser.parse
|
10
|
+
parser.emails.sort.should eql(["bruce@wayne.com"])
|
11
|
+
end
|
12
|
+
|
13
|
+
it "raises exception if file is invalid" do
|
14
|
+
parser = Colander::Parser::Xls.new("./#{__FILE__}")
|
15
|
+
lambda {
|
16
|
+
parser.parse
|
17
|
+
}.should raise_error(Colander::InvalidFile)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "retreives emails from an 95-excel spreadsheet" do
|
21
|
+
parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
|
22
|
+
parser.parse
|
23
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
24
|
+
end
|
25
|
+
it "retreives emails from an xls spreadsheet" do
|
26
|
+
parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
|
27
|
+
parser.parse
|
28
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
29
|
+
end
|
30
|
+
it "retreives emails from an 95-excel spreadsheet without file suffix" do
|
31
|
+
parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
|
32
|
+
parser.parse
|
33
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/spec/xlsx_spec.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
describe Colander::Parser::Xlsx do
|
3
|
+
it "is subclass of Colander::Parser::Xls" do
|
4
|
+
Colander::Parser::Xlsx.new("foo/bar").should be_a Colander::Parser::Xls
|
5
|
+
end
|
6
|
+
|
7
|
+
describe "#parse" do
|
8
|
+
it "retreives emails from an xlsx spreadsheet" do
|
9
|
+
parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
|
10
|
+
parser.parse
|
11
|
+
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: colander
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Kristian Hellquist
|
14
|
+
- Jonas Forsberg
|
15
|
+
autorequire:
|
16
|
+
bindir: bin
|
17
|
+
cert_chain: []
|
18
|
+
|
19
|
+
date: 2011-08-29 00:00:00 Z
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: nokogiri
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 15
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 4
|
33
|
+
- 4
|
34
|
+
version: 1.4.4
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: roo
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 57
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 9
|
49
|
+
- 5
|
50
|
+
version: 1.9.5
|
51
|
+
type: :runtime
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: zip
|
55
|
+
prerelease: false
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
hash: 11
|
62
|
+
segments:
|
63
|
+
- 2
|
64
|
+
- 0
|
65
|
+
- 2
|
66
|
+
version: 2.0.2
|
67
|
+
type: :runtime
|
68
|
+
version_requirements: *id003
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
prerelease: false
|
72
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
type: :development
|
82
|
+
version_requirements: *id004
|
83
|
+
description: See summary. lol
|
84
|
+
email:
|
85
|
+
- dev@mynewsdesk.com
|
86
|
+
executables: []
|
87
|
+
|
88
|
+
extensions: []
|
89
|
+
|
90
|
+
extra_rdoc_files: []
|
91
|
+
|
92
|
+
files:
|
93
|
+
- .gitignore
|
94
|
+
- .rvmrc
|
95
|
+
- Gemfile
|
96
|
+
- README.md
|
97
|
+
- Rakefile
|
98
|
+
- colander.gemspec
|
99
|
+
- lib/colander.rb
|
100
|
+
- lib/colander/invalid_file.rb
|
101
|
+
- lib/colander/parser/base.rb
|
102
|
+
- lib/colander/parser/null.rb
|
103
|
+
- lib/colander/parser/xls.rb
|
104
|
+
- lib/colander/parser/xlsx.rb
|
105
|
+
- lib/colander/version.rb
|
106
|
+
- spec/base_spec.rb
|
107
|
+
- spec/colander_spec.rb
|
108
|
+
- spec/fixtures/excel95-without-file-suffix
|
109
|
+
- spec/fixtures/excel95.xls
|
110
|
+
- spec/fixtures/new-format.xlsx
|
111
|
+
- spec/fixtures/old-format.xls
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
- spec/xls_spec.rb
|
114
|
+
- spec/xlsx_spec.rb
|
115
|
+
homepage: http://devcorner.mynewsdesk.com
|
116
|
+
licenses: []
|
117
|
+
|
118
|
+
post_install_message:
|
119
|
+
rdoc_options: []
|
120
|
+
|
121
|
+
require_paths:
|
122
|
+
- lib
|
123
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
none: false
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
hash: 3
|
129
|
+
segments:
|
130
|
+
- 0
|
131
|
+
version: "0"
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
133
|
+
none: false
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
hash: 3
|
138
|
+
segments:
|
139
|
+
- 0
|
140
|
+
version: "0"
|
141
|
+
requirements: []
|
142
|
+
|
143
|
+
rubyforge_project:
|
144
|
+
rubygems_version: 1.8.10
|
145
|
+
signing_key:
|
146
|
+
specification_version: 3
|
147
|
+
summary: Exctract an array of emails from various file formats
|
148
|
+
test_files:
|
149
|
+
- spec/base_spec.rb
|
150
|
+
- spec/colander_spec.rb
|
151
|
+
- spec/fixtures/excel95-without-file-suffix
|
152
|
+
- spec/fixtures/excel95.xls
|
153
|
+
- spec/fixtures/new-format.xlsx
|
154
|
+
- spec/fixtures/old-format.xls
|
155
|
+
- spec/spec_helper.rb
|
156
|
+
- spec/xls_spec.rb
|
157
|
+
- spec/xlsx_spec.rb
|