colander 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.rvmrc CHANGED
@@ -1 +1 @@
1
- rvm --create ruby-1.8.7-p302@email_extractor
1
+ rvm --create 1.9.3-p125@colander
data/colander.gemspec CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_dependency "roo", "~> 1.10.1"
23
22
  s.add_dependency "zip", "~> 2.0.2"
24
23
  s.add_development_dependency "rspec"
24
+ s.add_development_dependency "pry"
25
25
  end
@@ -8,8 +8,31 @@ module Colander
8
8
  end
9
9
 
10
10
  def parse
11
+ @emails = collect_emails
12
+ rescue Exception => e
13
+ raise InvalidFile.new e
14
+ end
15
+
16
+ def payload
11
17
  raise "plz implement me in"
12
18
  end
19
+
20
+ protected
21
+
22
+ def collect_emails
23
+ parse_file.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/).flatten.uniq
24
+ end
25
+
26
+ def parse_file
27
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
28
+ std_out, std_err, exit_status = Open3.capture3("strings", :stdin_data => payload)
29
+ if exit_status == 0
30
+ ic.iconv(std_out)
31
+ else
32
+ raise RuntimeError.new(std_err)
33
+ end
34
+ end
35
+
13
36
  end
14
37
  end
15
- end
38
+ end
@@ -1,31 +1,15 @@
1
1
  require 'colander/invalid_file'
2
2
  require 'colander/parser/base'
3
- require 'roo'
4
- require 'iconv'
5
-
3
+ require 'open3'
6
4
  module Colander
7
5
  module Parser
8
6
  class Xls < Base
9
- def parse
10
- spreadsheet = parse_file
11
- @emails = collect_emails spreadsheet
12
- rescue Exception => e
13
- raise InvalidFile.new e
14
- end
15
7
 
16
8
  protected
17
9
 
18
- def parse_file
19
- Excel.new(@file_path,nil,:ignore)
10
+ def payload
11
+ File.read(@file_path)
20
12
  end
21
-
22
- def collect_emails(spreadsheet)
23
- spreadsheet.sheets.map do |sheet|
24
- spreadsheet.default_sheet = sheet
25
- spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
26
- end.flatten
27
- end
28
-
29
13
  end
30
14
  end
31
- end
15
+ end
@@ -1,13 +1,21 @@
1
1
  require 'colander/parser/base'
2
- require 'roo'
3
-
2
+ require 'zip'
3
+ require 'iconv'
4
4
  module Colander
5
5
  module Parser
6
6
  class Xlsx < Xls
7
+
7
8
  protected
8
- def parse_file
9
- Excelx.new(@file_path,nil,:ignore)
9
+
10
+ def payload
11
+ ''.tap do |string|
12
+ Zip::ZipInputStream::open(@file_path) do |io|
13
+ while (entry = io.get_next_entry)
14
+ string << io.read
15
+ end
16
+ end
17
+ end
10
18
  end
11
19
  end
12
20
  end
13
- end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module Colander
2
- VERSION = "0.0.3"
2
+ VERSION = "0.1.0"
3
3
  end
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,4 @@
1
1
  $:.push File.expand_path("../lib", __FILE__)
2
2
 
3
- require 'colander'
3
+ require 'colander'
4
+ require 'pry'
data/spec/xls_spec.rb CHANGED
@@ -3,9 +3,8 @@ require 'spec_helper'
3
3
  describe Colander::Parser::Xls do
4
4
  describe "#parse" do
5
5
  it "stores found emails" do
6
- Excel.stub(:new)
7
6
  parser = Colander::Parser::Xls.new("file/path")
8
- parser.should_receive(:collect_emails).and_return(["bruce@wayne.com"])
7
+ parser.should_receive(:parse_file).and_return("bruce@wayne.com")
9
8
  parser.parse
10
9
  parser.emails.sort.should eql(["bruce@wayne.com"])
11
10
  end
@@ -26,21 +25,19 @@ describe Colander::Parser::Xls do
26
25
  end
27
26
 
28
27
  it "retreives emails from an 95-excel spreadsheet" do
29
- pending "handle encoding error"
30
28
  parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
31
29
  parser.parse
32
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
30
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
33
31
  end
34
32
  it "retreives emails from an xls spreadsheet" do
35
33
  parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
36
34
  parser.parse
37
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
35
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
38
36
  end
39
37
  it "retreives emails from an 95-excel spreadsheet without file suffix" do
40
- pending "handle encoding error"
41
38
  parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
42
39
  parser.parse
43
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
40
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
44
41
  end
45
42
  end
46
43
  end
data/spec/xlsx_spec.rb CHANGED
@@ -8,7 +8,7 @@ describe Colander::Parser::Xlsx do
8
8
  it "retreives emails from an xlsx spreadsheet" do
9
9
  parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
10
10
  parser.parse
11
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
11
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
12
12
  end
13
13
  end
14
14
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: colander
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,33 +10,33 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-03-01 00:00:00.000000000 Z
13
+ date: 2012-03-21 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: roo
17
- requirement: &70276138479340 !ruby/object:Gem::Requirement
16
+ name: zip
17
+ requirement: &70203123886720 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
21
21
  - !ruby/object:Gem::Version
22
- version: 1.10.1
22
+ version: 2.0.2
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *70276138479340
25
+ version_requirements: *70203123886720
26
26
  - !ruby/object:Gem::Dependency
27
- name: zip
28
- requirement: &70276138467840 !ruby/object:Gem::Requirement
27
+ name: rspec
28
+ requirement: &70203123886060 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
- - - ~>
31
+ - - ! '>='
32
32
  - !ruby/object:Gem::Version
33
- version: 2.0.2
34
- type: :runtime
33
+ version: '0'
34
+ type: :development
35
35
  prerelease: false
36
- version_requirements: *70276138467840
36
+ version_requirements: *70203123886060
37
37
  - !ruby/object:Gem::Dependency
38
- name: rspec
39
- requirement: &70276138466580 !ruby/object:Gem::Requirement
38
+ name: pry
39
+ requirement: &70203123885380 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,7 +44,7 @@ dependencies:
44
44
  version: '0'
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *70276138466580
47
+ version_requirements: *70203123885380
48
48
  description: Exctract an array of emails from various file formats (xls, xlsx)
49
49
  email:
50
50
  - dev@mynewsdesk.com