colander 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rvmrc +1 -1
- data/colander.gemspec +1 -1
- data/lib/colander/parser/base.rb +24 -1
- data/lib/colander/parser/xls.rb +4 -20
- data/lib/colander/parser/xlsx.rb +13 -5
- data/lib/colander/version.rb +1 -1
- data/spec/spec_helper.rb +2 -1
- data/spec/xls_spec.rb +4 -7
- data/spec/xlsx_spec.rb +1 -1
- metadata +15 -15
data/.rvmrc
CHANGED
@@ -1 +1 @@
|
|
1
|
-
rvm --create
|
1
|
+
rvm --create 1.9.3-p125@colander
|
data/colander.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.add_dependency "roo", "~> 1.10.1"
|
23
22
|
s.add_dependency "zip", "~> 2.0.2"
|
24
23
|
s.add_development_dependency "rspec"
|
24
|
+
s.add_development_dependency "pry"
|
25
25
|
end
|
data/lib/colander/parser/base.rb
CHANGED
@@ -8,8 +8,31 @@ module Colander
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def parse
|
11
|
+
@emails = collect_emails
|
12
|
+
rescue Exception => e
|
13
|
+
raise InvalidFile.new e
|
14
|
+
end
|
15
|
+
|
16
|
+
def payload
|
11
17
|
raise "plz implement me in"
|
12
18
|
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
|
22
|
+
def collect_emails
|
23
|
+
parse_file.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/).flatten.uniq
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_file
|
27
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
28
|
+
std_out, std_err, exit_status = Open3.capture3("strings", :stdin_data => payload)
|
29
|
+
if exit_status == 0
|
30
|
+
ic.iconv(std_out)
|
31
|
+
else
|
32
|
+
raise RuntimeError.new(std_err)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
13
36
|
end
|
14
37
|
end
|
15
|
-
end
|
38
|
+
end
|
data/lib/colander/parser/xls.rb
CHANGED
@@ -1,31 +1,15 @@
|
|
1
1
|
require 'colander/invalid_file'
|
2
2
|
require 'colander/parser/base'
|
3
|
-
require '
|
4
|
-
require 'iconv'
|
5
|
-
|
3
|
+
require 'open3'
|
6
4
|
module Colander
|
7
5
|
module Parser
|
8
6
|
class Xls < Base
|
9
|
-
def parse
|
10
|
-
spreadsheet = parse_file
|
11
|
-
@emails = collect_emails spreadsheet
|
12
|
-
rescue Exception => e
|
13
|
-
raise InvalidFile.new e
|
14
|
-
end
|
15
7
|
|
16
8
|
protected
|
17
9
|
|
18
|
-
def
|
19
|
-
|
10
|
+
def payload
|
11
|
+
File.read(@file_path)
|
20
12
|
end
|
21
|
-
|
22
|
-
def collect_emails(spreadsheet)
|
23
|
-
spreadsheet.sheets.map do |sheet|
|
24
|
-
spreadsheet.default_sheet = sheet
|
25
|
-
spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
|
26
|
-
end.flatten
|
27
|
-
end
|
28
|
-
|
29
13
|
end
|
30
14
|
end
|
31
|
-
end
|
15
|
+
end
|
data/lib/colander/parser/xlsx.rb
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
require 'colander/parser/base'
|
2
|
-
require '
|
3
|
-
|
2
|
+
require 'zip'
|
3
|
+
require 'iconv'
|
4
4
|
module Colander
|
5
5
|
module Parser
|
6
6
|
class Xlsx < Xls
|
7
|
+
|
7
8
|
protected
|
8
|
-
|
9
|
-
|
9
|
+
|
10
|
+
def payload
|
11
|
+
''.tap do |string|
|
12
|
+
Zip::ZipInputStream::open(@file_path) do |io|
|
13
|
+
while (entry = io.get_next_entry)
|
14
|
+
string << io.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
10
18
|
end
|
11
19
|
end
|
12
20
|
end
|
13
|
-
end
|
21
|
+
end
|
data/lib/colander/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
data/spec/xls_spec.rb
CHANGED
@@ -3,9 +3,8 @@ require 'spec_helper'
|
|
3
3
|
describe Colander::Parser::Xls do
|
4
4
|
describe "#parse" do
|
5
5
|
it "stores found emails" do
|
6
|
-
Excel.stub(:new)
|
7
6
|
parser = Colander::Parser::Xls.new("file/path")
|
8
|
-
parser.should_receive(:
|
7
|
+
parser.should_receive(:parse_file).and_return("bruce@wayne.com")
|
9
8
|
parser.parse
|
10
9
|
parser.emails.sort.should eql(["bruce@wayne.com"])
|
11
10
|
end
|
@@ -26,21 +25,19 @@ describe Colander::Parser::Xls do
|
|
26
25
|
end
|
27
26
|
|
28
27
|
it "retreives emails from an 95-excel spreadsheet" do
|
29
|
-
pending "handle encoding error"
|
30
28
|
parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
|
31
29
|
parser.parse
|
32
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
30
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
33
31
|
end
|
34
32
|
it "retreives emails from an xls spreadsheet" do
|
35
33
|
parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
|
36
34
|
parser.parse
|
37
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
35
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
38
36
|
end
|
39
37
|
it "retreives emails from an 95-excel spreadsheet without file suffix" do
|
40
|
-
pending "handle encoding error"
|
41
38
|
parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
|
42
39
|
parser.parse
|
43
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
40
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
44
41
|
end
|
45
42
|
end
|
46
43
|
end
|
data/spec/xlsx_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe Colander::Parser::Xlsx do
|
|
8
8
|
it "retreives emails from an xlsx spreadsheet" do
|
9
9
|
parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
|
10
10
|
parser.parse
|
11
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
11
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: colander
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,33 +10,33 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-03-
|
13
|
+
date: 2012-03-21 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
17
|
-
requirement: &
|
16
|
+
name: zip
|
17
|
+
requirement: &70203123886720 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
22
|
+
version: 2.0.2
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70203123886720
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
28
|
-
requirement: &
|
27
|
+
name: rspec
|
28
|
+
requirement: &70203123886060 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ! '>='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
34
|
-
type: :
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70203123886060
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
39
|
-
requirement: &
|
38
|
+
name: pry
|
39
|
+
requirement: &70203123885380 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
version: '0'
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70203123885380
|
48
48
|
description: Exctract an array of emails from various file formats (xls, xlsx)
|
49
49
|
email:
|
50
50
|
- dev@mynewsdesk.com
|