colander 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rvmrc +1 -1
- data/colander.gemspec +1 -1
- data/lib/colander/parser/base.rb +24 -1
- data/lib/colander/parser/xls.rb +4 -20
- data/lib/colander/parser/xlsx.rb +13 -5
- data/lib/colander/version.rb +1 -1
- data/spec/spec_helper.rb +2 -1
- data/spec/xls_spec.rb +4 -7
- data/spec/xlsx_spec.rb +1 -1
- metadata +15 -15
data/.rvmrc
CHANGED
@@ -1 +1 @@
|
|
1
|
-
rvm --create
|
1
|
+
rvm --create 1.9.3-p125@colander
|
data/colander.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.add_dependency "roo", "~> 1.10.1"
|
23
22
|
s.add_dependency "zip", "~> 2.0.2"
|
24
23
|
s.add_development_dependency "rspec"
|
24
|
+
s.add_development_dependency "pry"
|
25
25
|
end
|
data/lib/colander/parser/base.rb
CHANGED
@@ -8,8 +8,31 @@ module Colander
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def parse
|
11
|
+
@emails = collect_emails
|
12
|
+
rescue Exception => e
|
13
|
+
raise InvalidFile.new e
|
14
|
+
end
|
15
|
+
|
16
|
+
def payload
|
11
17
|
raise "plz implement me in"
|
12
18
|
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
|
22
|
+
def collect_emails
|
23
|
+
parse_file.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/).flatten.uniq
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_file
|
27
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
28
|
+
std_out, std_err, exit_status = Open3.capture3("strings", :stdin_data => payload)
|
29
|
+
if exit_status == 0
|
30
|
+
ic.iconv(std_out)
|
31
|
+
else
|
32
|
+
raise RuntimeError.new(std_err)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
13
36
|
end
|
14
37
|
end
|
15
|
-
end
|
38
|
+
end
|
data/lib/colander/parser/xls.rb
CHANGED
@@ -1,31 +1,15 @@
|
|
1
1
|
require 'colander/invalid_file'
|
2
2
|
require 'colander/parser/base'
|
3
|
-
require '
|
4
|
-
require 'iconv'
|
5
|
-
|
3
|
+
require 'open3'
|
6
4
|
module Colander
|
7
5
|
module Parser
|
8
6
|
class Xls < Base
|
9
|
-
def parse
|
10
|
-
spreadsheet = parse_file
|
11
|
-
@emails = collect_emails spreadsheet
|
12
|
-
rescue Exception => e
|
13
|
-
raise InvalidFile.new e
|
14
|
-
end
|
15
7
|
|
16
8
|
protected
|
17
9
|
|
18
|
-
def
|
19
|
-
|
10
|
+
def payload
|
11
|
+
File.read(@file_path)
|
20
12
|
end
|
21
|
-
|
22
|
-
def collect_emails(spreadsheet)
|
23
|
-
spreadsheet.sheets.map do |sheet|
|
24
|
-
spreadsheet.default_sheet = sheet
|
25
|
-
spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
|
26
|
-
end.flatten
|
27
|
-
end
|
28
|
-
|
29
13
|
end
|
30
14
|
end
|
31
|
-
end
|
15
|
+
end
|
data/lib/colander/parser/xlsx.rb
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
require 'colander/parser/base'
|
2
|
-
require '
|
3
|
-
|
2
|
+
require 'zip'
|
3
|
+
require 'iconv'
|
4
4
|
module Colander
|
5
5
|
module Parser
|
6
6
|
class Xlsx < Xls
|
7
|
+
|
7
8
|
protected
|
8
|
-
|
9
|
-
|
9
|
+
|
10
|
+
def payload
|
11
|
+
''.tap do |string|
|
12
|
+
Zip::ZipInputStream::open(@file_path) do |io|
|
13
|
+
while (entry = io.get_next_entry)
|
14
|
+
string << io.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
10
18
|
end
|
11
19
|
end
|
12
20
|
end
|
13
|
-
end
|
21
|
+
end
|
data/lib/colander/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
data/spec/xls_spec.rb
CHANGED
@@ -3,9 +3,8 @@ require 'spec_helper'
|
|
3
3
|
describe Colander::Parser::Xls do
|
4
4
|
describe "#parse" do
|
5
5
|
it "stores found emails" do
|
6
|
-
Excel.stub(:new)
|
7
6
|
parser = Colander::Parser::Xls.new("file/path")
|
8
|
-
parser.should_receive(:
|
7
|
+
parser.should_receive(:parse_file).and_return("bruce@wayne.com")
|
9
8
|
parser.parse
|
10
9
|
parser.emails.sort.should eql(["bruce@wayne.com"])
|
11
10
|
end
|
@@ -26,21 +25,19 @@ describe Colander::Parser::Xls do
|
|
26
25
|
end
|
27
26
|
|
28
27
|
it "retreives emails from an 95-excel spreadsheet" do
|
29
|
-
pending "handle encoding error"
|
30
28
|
parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
|
31
29
|
parser.parse
|
32
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
30
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
33
31
|
end
|
34
32
|
it "retreives emails from an xls spreadsheet" do
|
35
33
|
parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
|
36
34
|
parser.parse
|
37
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
35
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
38
36
|
end
|
39
37
|
it "retreives emails from an 95-excel spreadsheet without file suffix" do
|
40
|
-
pending "handle encoding error"
|
41
38
|
parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
|
42
39
|
parser.parse
|
43
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
40
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
44
41
|
end
|
45
42
|
end
|
46
43
|
end
|
data/spec/xlsx_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe Colander::Parser::Xlsx do
|
|
8
8
|
it "retreives emails from an xlsx spreadsheet" do
|
9
9
|
parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
|
10
10
|
parser.parse
|
11
|
-
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
|
11
|
+
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: colander
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,33 +10,33 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-03-
|
13
|
+
date: 2012-03-21 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
17
|
-
requirement: &
|
16
|
+
name: zip
|
17
|
+
requirement: &70203123886720 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
22
|
+
version: 2.0.2
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70203123886720
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
28
|
-
requirement: &
|
27
|
+
name: rspec
|
28
|
+
requirement: &70203123886060 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ! '>='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
34
|
-
type: :
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70203123886060
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
39
|
-
requirement: &
|
38
|
+
name: pry
|
39
|
+
requirement: &70203123885380 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
version: '0'
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70203123885380
|
48
48
|
description: Exctract an array of emails from various file formats (xls, xlsx)
|
49
49
|
email:
|
50
50
|
- dev@mynewsdesk.com
|