colander 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.rvmrc CHANGED
@@ -1 +1 @@
1
- rvm --create ruby-1.8.7-p302@email_extractor
1
+ rvm --create 1.9.3-p125@colander
data/colander.gemspec CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_dependency "roo", "~> 1.10.1"
23
22
  s.add_dependency "zip", "~> 2.0.2"
24
23
  s.add_development_dependency "rspec"
24
+ s.add_development_dependency "pry"
25
25
  end
@@ -8,8 +8,31 @@ module Colander
8
8
  end
9
9
 
10
10
  def parse
11
+ @emails = collect_emails
12
+ rescue Exception => e
13
+ raise InvalidFile.new e
14
+ end
15
+
16
+ def payload
11
17
  raise "plz implement me in"
12
18
  end
19
+
20
+ protected
21
+
22
+ def collect_emails
23
+ parse_file.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/).flatten.uniq
24
+ end
25
+
26
+ def parse_file
27
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
28
+ std_out, std_err, exit_status = Open3.capture3("strings", :stdin_data => payload)
29
+ if exit_status == 0
30
+ ic.iconv(std_out)
31
+ else
32
+ raise RuntimeError.new(std_err)
33
+ end
34
+ end
35
+
13
36
  end
14
37
  end
15
- end
38
+ end
@@ -1,31 +1,15 @@
1
1
  require 'colander/invalid_file'
2
2
  require 'colander/parser/base'
3
- require 'roo'
4
- require 'iconv'
5
-
3
+ require 'open3'
6
4
  module Colander
7
5
  module Parser
8
6
  class Xls < Base
9
- def parse
10
- spreadsheet = parse_file
11
- @emails = collect_emails spreadsheet
12
- rescue Exception => e
13
- raise InvalidFile.new e
14
- end
15
7
 
16
8
  protected
17
9
 
18
- def parse_file
19
- Excel.new(@file_path,nil,:ignore)
10
+ def payload
11
+ File.read(@file_path)
20
12
  end
21
-
22
- def collect_emails(spreadsheet)
23
- spreadsheet.sheets.map do |sheet|
24
- spreadsheet.default_sheet = sheet
25
- spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
26
- end.flatten
27
- end
28
-
29
13
  end
30
14
  end
31
- end
15
+ end
@@ -1,13 +1,21 @@
1
1
  require 'colander/parser/base'
2
- require 'roo'
3
-
2
+ require 'zip'
3
+ require 'iconv'
4
4
  module Colander
5
5
  module Parser
6
6
  class Xlsx < Xls
7
+
7
8
  protected
8
- def parse_file
9
- Excelx.new(@file_path,nil,:ignore)
9
+
10
+ def payload
11
+ ''.tap do |string|
12
+ Zip::ZipInputStream::open(@file_path) do |io|
13
+ while (entry = io.get_next_entry)
14
+ string << io.read
15
+ end
16
+ end
17
+ end
10
18
  end
11
19
  end
12
20
  end
13
- end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module Colander
2
- VERSION = "0.0.3"
2
+ VERSION = "0.1.0"
3
3
  end
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,4 @@
1
1
  $:.push File.expand_path("../lib", __FILE__)
2
2
 
3
- require 'colander'
3
+ require 'colander'
4
+ require 'pry'
data/spec/xls_spec.rb CHANGED
@@ -3,9 +3,8 @@ require 'spec_helper'
3
3
  describe Colander::Parser::Xls do
4
4
  describe "#parse" do
5
5
  it "stores found emails" do
6
- Excel.stub(:new)
7
6
  parser = Colander::Parser::Xls.new("file/path")
8
- parser.should_receive(:collect_emails).and_return(["bruce@wayne.com"])
7
+ parser.should_receive(:parse_file).and_return("bruce@wayne.com")
9
8
  parser.parse
10
9
  parser.emails.sort.should eql(["bruce@wayne.com"])
11
10
  end
@@ -26,21 +25,19 @@ describe Colander::Parser::Xls do
26
25
  end
27
26
 
28
27
  it "retreives emails from an 95-excel spreadsheet" do
29
- pending "handle encoding error"
30
28
  parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
31
29
  parser.parse
32
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
30
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
33
31
  end
34
32
  it "retreives emails from an xls spreadsheet" do
35
33
  parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
36
34
  parser.parse
37
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
35
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
38
36
  end
39
37
  it "retreives emails from an 95-excel spreadsheet without file suffix" do
40
- pending "handle encoding error"
41
38
  parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
42
39
  parser.parse
43
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
40
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
44
41
  end
45
42
  end
46
43
  end
data/spec/xlsx_spec.rb CHANGED
@@ -8,7 +8,7 @@ describe Colander::Parser::Xlsx do
8
8
  it "retreives emails from an xlsx spreadsheet" do
9
9
  parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
10
10
  parser.parse
11
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
11
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
12
12
  end
13
13
  end
14
14
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: colander
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,33 +10,33 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-03-01 00:00:00.000000000 Z
13
+ date: 2012-03-21 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: roo
17
- requirement: &70276138479340 !ruby/object:Gem::Requirement
16
+ name: zip
17
+ requirement: &70203123886720 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
21
21
  - !ruby/object:Gem::Version
22
- version: 1.10.1
22
+ version: 2.0.2
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *70276138479340
25
+ version_requirements: *70203123886720
26
26
  - !ruby/object:Gem::Dependency
27
- name: zip
28
- requirement: &70276138467840 !ruby/object:Gem::Requirement
27
+ name: rspec
28
+ requirement: &70203123886060 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
- - - ~>
31
+ - - ! '>='
32
32
  - !ruby/object:Gem::Version
33
- version: 2.0.2
34
- type: :runtime
33
+ version: '0'
34
+ type: :development
35
35
  prerelease: false
36
- version_requirements: *70276138467840
36
+ version_requirements: *70203123886060
37
37
  - !ruby/object:Gem::Dependency
38
- name: rspec
39
- requirement: &70276138466580 !ruby/object:Gem::Requirement
38
+ name: pry
39
+ requirement: &70203123885380 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,7 +44,7 @@ dependencies:
44
44
  version: '0'
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *70276138466580
47
+ version_requirements: *70203123885380
48
48
  description: Exctract an array of emails from various file formats (xls, xlsx)
49
49
  email:
50
50
  - dev@mynewsdesk.com