embulk-parser-roo-excel 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 64fc22e2d5dbfb23ecfe3ed1c79052eaf6e50c78
4
+ data.tar.gz: 1562aedacddcecbd1fcbd4a50309f23110f7f03d
5
+ SHA512:
6
+ metadata.gz: 23f9f8c19103d5ca363eed6ec2d5bcf2a8c317c06b00778021820ea3b5df18f81a17023ec87021b56aff929147d03410156a770d5e1f9a9b09ac3be417b96d22
7
+ data.tar.gz: 3e540d730f174ba917d5df6f5168dab6de6030070a3a5f87f4d0230e6b0b4a810faf80f3570efd9048a0f36046a1c90ec8de6c352c7be4141ad91dca6b9c39a3
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,38 @@
1
+ # Roo Excel parser plugin for Embulk
2
+
3
+ Read Microsoft Excel(xlsx) files from input plugins.
4
+
5
+ ## Overview
6
+
7
+ * **Plugin type**: parser
8
+ * **Guess supported**: no
9
+
10
+ ## Configuration
11
+
12
+ - **skip_header_lines**: Skip this number of lines first. Set 1 if the file has header line. (integer, default: 0)
13
+ - **sheet**: the name of sheet (string, default: null (first sheet))
14
+
15
+ ## Example
16
+
17
+ ```yaml
18
+ in:
19
+ type: any file input plugin type
20
+ parser:
21
+ type: roo-excel
22
+ skip_header_lines: 1 # first row is header.
23
+ sheet: "beatles"
24
+
25
+ ```
26
+
27
+ (If guess supported) you don't have to write `parser:` section in the configuration file. After writing `in:` section, you can let embulk guess `parser:` section using this command:
28
+
29
+ ```
30
+ $ embulk gem install embulk-parser-roo-excel
31
+ $ embulk guess -g roo-excel config.yml -o guessed.yml
32
+ ```
33
+
34
+ ## Build
35
+
36
+ ```
37
+ $ rake
38
+ ```
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,18 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "embulk-parser-roo-excel"
3
+ spec.version = "0.0.1"
4
+ spec.authors = ["Hiroyuki Sato"]
5
+ spec.summary = "Roo Excel parser plugin for Embulk"
6
+ spec.description = "Parses Excel files(xlsx) read by other file input plugins."
7
+ spec.email = ["hiroysato@gmail.com"]
8
+ spec.licenses = ["MIT"]
9
+ spec.homepage = "https://github.com/hiroyuki-sato/embulk-parser-roo-excel"
10
+
11
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
12
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
13
+ spec.require_paths = ["lib"]
14
+
15
+ spec.add_development_dependency 'bundler', ['~> 1.0']
16
+ spec.add_development_dependency 'rake', ['>= 10.0']
17
+ spec.add_dependency 'roo', ['~> 2.0.1']
18
+ end
@@ -0,0 +1,63 @@
1
+ module Embulk
2
+ module Guess
3
+
4
+ # TODO implement guess plugin to make this command work:
5
+ # $ embulk guess -g "roo-excel" partial-config.yml
6
+ #
7
+ # Depending on the file format the plugin uses, you can use choose
8
+ # one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
9
+ # or line guess (LineGuessPlugin).
10
+
11
+ #require "embulk/parser/roo-excel.rb"
12
+
13
+ #class RooExcelParserGuessPlugin < GuessPlugin
14
+ # Plugin.register_guess("roo-excel", self)
15
+ #
16
+ # def guess(config, sample_buffer)
17
+ # if sample_buffer[0,2] == GZIP_HEADER
18
+ # guessed = {}
19
+ # guessed["type"] = "roo-excel"
20
+ # guessed["property1"] = "guessed-value"
21
+ # return {"parser" => guessed}
22
+ # else
23
+ # return {}
24
+ # end
25
+ # end
26
+ #end
27
+
28
+ #class RooExcelParserGuessPlugin < TextGuessPlugin
29
+ # Plugin.register_guess("roo-excel", self)
30
+ #
31
+ # def guess_text(config, sample_text)
32
+ # js = JSON.parse(sample_text) rescue nil
33
+ # if js && js["mykeyword"] == "keyword"
34
+ # guessed = {}
35
+ # guessed["type"] = "roo-excel"
36
+ # guessed["property1"] = "guessed-value"
37
+ # return {"parser" => guessed}
38
+ # else
39
+ # return {}
40
+ # end
41
+ # end
42
+ #end
43
+
44
+ #class RooExcelParserGuessPlugin < LineGuessPlugin
45
+ # Plugin.register_guess("roo-excel", self)
46
+ #
47
+ # def guess_lines(config, sample_lines)
48
+ # all_line_matched = sample_lines.all? do |line|
49
+ # line =~ /mypattern/
50
+ # end
51
+ # if all_line_matched
52
+ # guessed = {}
53
+ # guessed["type"] = "roo-excel"
54
+ # guessed["property1"] = "guessed-value"
55
+ # return {"parser" => guessed}
56
+ # else
57
+ # return {}
58
+ # end
59
+ # end
60
+ #end
61
+
62
+ end
63
+ end
@@ -0,0 +1,105 @@
1
+ require 'roo'
2
+ module Embulk
3
+ module Parser
4
+
5
+ class RooExcelParserPlugin < ParserPlugin
6
+ Plugin.register_parser("roo-excel", self)
7
+
8
+ def self.transaction(config, &control)
9
+ # configuration code:
10
+ task = {
11
+ "columns" => config.param("columns", :array),
12
+ "sheet" => config.param("sheet", :string, default: nil),
13
+ "skip_header_lines" => config.param("skip_header_lines",:integer, default:0),
14
+ }
15
+
16
+ if( task['skip_header_lines'] < 0 )
17
+ raise ArgumentError, "skip_header_line does not allow negative number"
18
+ end
19
+
20
+ columns = []
21
+ task['columns'].each_with_index do |c,i|
22
+ columns << Column.new(i, c['name'], c['type'].to_sym)
23
+ end
24
+
25
+ yield(task, columns)
26
+ end
27
+
28
+ def init
29
+ # initialization code:
30
+ @sheet = task["sheet"]
31
+ @columns = task["columns"]
32
+ @data_pos = task["skip_header_lines"] + 1
33
+ end
34
+
35
+ def run(file_input)
36
+ while file = file_input.next_file
37
+
38
+ begin
39
+ xlsx = Roo::Excelx.new(StringIO.new(file.read))
40
+ if( @sheet )
41
+ xlsx.default_sheet = @sheet
42
+ else
43
+ xlsx.default_sheet = xlsx.sheets.first
44
+ end
45
+ last_row = xlsx.last_row
46
+ if ( last_row.nil? or last_row - @data_pos <= 0 )
47
+ puts "No data. skip this file"
48
+ next
49
+ end
50
+
51
+ ncol = @columns.size
52
+ @data_pos.upto(last_row) do |row|
53
+ data = []
54
+ 1.upto(ncol) do |col|
55
+ column = @columns[col-1]
56
+ data << convert_cell(column,xlsx,row,col)
57
+ end
58
+ @page_builder.add(data)
59
+ end
60
+ rescue ArgumentError
61
+ puts $!
62
+ puts $!.backtrace
63
+ puts "Can't open data file"
64
+ rescue
65
+ raise
66
+ end
67
+ end
68
+ page_builder.finish
69
+ end
70
+
71
+ # MEMO roo celltype
72
+ # returns the type of a cell: * :float * :string, * :date * :percentage * :formula * :time * :datetime.
73
+ #
74
+ def convert_cell(column,xlsx,nrow,ncol)
75
+ d = xlsx.cell(nrow,ncol)
76
+ type = column['type'] || 'string'
77
+ case type
78
+ when 'long'
79
+ d.to_i
80
+ when 'double'
81
+ d.to_f
82
+ when 'string'
83
+ d.to_s
84
+ when 'timestamp'
85
+ convert_time(d)
86
+ else # TODO
87
+ d.to_s
88
+ end
89
+ end
90
+
91
+ def convert_time(t)
92
+ if( t.kind_of?(Date) or t.kind_of?(DateTime) )
93
+ t.to_time
94
+ elsif( t.kind_of?(Time) )
95
+ t
96
+ elsif( t.kind_of?(String) )
97
+ Time.parse(t)
98
+ else
99
+ raise ArgumentError,"Can't convert time:#{t}"
100
+ end
101
+ end
102
+
103
+ end
104
+ end
105
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-roo-excel
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Hiroyuki Sato
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: roo
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 2.0.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 2.0.1
55
+ description: Parses Excel files(xlsx) read by other file input plugins.
56
+ email:
57
+ - hiroysato@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - embulk-parser-roo-excel.gemspec
68
+ - lib/embulk/guess/roo-excel.rb
69
+ - lib/embulk/parser/roo-excel.rb
70
+ homepage: https://github.com/hiroyuki-sato/embulk-parser-roo-excel
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.4.5
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Roo Excel parser plugin for Embulk
94
+ test_files: []
95
+ has_rdoc: