pg_csv 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ .bundle/
2
+ log/*.log
3
+ pkg/
4
+ test/dummy/db/*.sqlite3
5
+ test/dummy/log/*.log
6
+ test/dummy/tmp/
7
+ test/dummy/.sass-cache
8
+ *.csv
9
+ *.gz
10
+ *.log
11
+ tmp/*
12
+ !tmp/.gitkeep
13
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem 'activerecord', :require => "active_record"
data/Gemfile.lock ADDED
@@ -0,0 +1,46 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ pg_csv (0.1)
5
+ activerecord
6
+ pg
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ activemodel (3.2.3)
12
+ activesupport (= 3.2.3)
13
+ builder (~> 3.0.0)
14
+ activerecord (3.2.3)
15
+ activemodel (= 3.2.3)
16
+ activesupport (= 3.2.3)
17
+ arel (~> 3.0.2)
18
+ tzinfo (~> 0.3.29)
19
+ activesupport (3.2.3)
20
+ i18n (~> 0.6)
21
+ multi_json (~> 1.0)
22
+ arel (3.0.2)
23
+ builder (3.0.0)
24
+ diff-lcs (1.1.3)
25
+ i18n (0.6.0)
26
+ multi_json (1.3.6)
27
+ pg (0.13.2)
28
+ rake (0.9.2.2)
29
+ rspec (2.10.0)
30
+ rspec-core (~> 2.10.0)
31
+ rspec-expectations (~> 2.10.0)
32
+ rspec-mocks (~> 2.10.0)
33
+ rspec-core (2.10.1)
34
+ rspec-expectations (2.10.0)
35
+ diff-lcs (~> 1.1.3)
36
+ rspec-mocks (2.10.1)
37
+ tzinfo (0.3.33)
38
+
39
+ PLATFORMS
40
+ ruby
41
+
42
+ DEPENDENCIES
43
+ activerecord
44
+ pg_csv!
45
+ rake
46
+ rspec
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2012 Makarchev K
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,57 @@
1
+ PgCsv
2
+ =====
3
+
4
+ Fast AR/PostgreSQL csv export. Uses pg function 'copy to csv'. Effective on millions rows.
5
+
6
+ Gemfile:
7
+ ``` ruby
8
+ gem 'pg_csv'
9
+ ```
10
+
11
+ Usage:
12
+ ``` ruby
13
+ PgCsv.new(opts).export(to, opts)
14
+ ```
15
+
16
+ 'to' is a stream or filename
17
+
18
+ Options:
19
+ ``` ruby
20
+ :sql => "select p.* from users u, projects p where p.user_id = u.id order by email limit 10"
21
+ :connection => AR.connection
22
+ :delimiter => ["\t", ",", ]
23
+ :header => boolean, use pg header for fields?
24
+ :logger => logger
25
+ :columns => manual array of column names, ignore :header option
26
+
27
+ :temp_file => boolean, generate throught temp file, final file appears by mv
28
+ :temp_dir => for :temp_file, ex: '/tmp'
29
+
30
+ :type => :plain - return full string
31
+ => :gzip - save file to gzip
32
+ => :stream - save to stream
33
+ => :file - just save to file = default
34
+ ```
35
+
36
+ Examples:
37
+ ``` ruby
38
+ PgCsv.new(:sql => User.good.to_sql).export('a1.csv')
39
+ PgCsv.new(:sql => sql).export('a2.gz', :type => :gzip)
40
+ PgCsv.new(:sql => sql).export('a3.csv', :temp_file => true)
41
+ PgCsv.new(:sql => sql).export(nil, :type => :plain)
42
+ File.open("a4.csv", 'a'){|f| FastPgCsv.new(:sql => "select * from users").\
43
+ export(f, :type => :stream) }
44
+ PgCsv.new(:sql => sql).export('a5.csv', :delimiter => "\t")
45
+ PgCsv.new(:sql => sql).export('a6.csv', :header => true)
46
+ PgCsv.new(:sql => sql).export('a7.csv', :columns => %w{id a b c})
47
+ PgCsv.new(:sql => sql, :connection => SomeDb.connection, :columns => %w{id a b c}, :delimiter => "|").\
48
+ export('a8.gz', :type => :gzip, :temp_file => true)
49
+
50
+ # example collect from shards
51
+ Zlib::GzipWriter.open('some.gz') do |stream|
52
+ e = PgCsv.new(:sql => sql, :type => :stream)
53
+ ConnectionPool.each_shard do |connection|
54
+ e.export(stream, :connection => connection)
55
+ end
56
+ end
57
+ ```
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'bundler'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+ task :default => :spec
8
+ RSpec::Core::RakeTask.new(:spec)
data/lib/pg_csv.rb ADDED
@@ -0,0 +1,182 @@
1
+ require 'active_record'
2
+
3
+ class PgCsv
4
+
5
+ # opts:
6
+ # :sql => "select u.*, p.* from users u, projects p where p.user_id = u.id order by email limit 100"
7
+ # :connection => AR.connection
8
+ # :delimiter => ["\t", ",", ]
9
+ # :header => boolean, use pg header for fields?
10
+ # :logger => logger
11
+ # :columns => manual array of column names, ignore :header option
12
+
13
+ # :temp_file => boolean, generating throught temp file, final file appears by mv
14
+ # :temp_dir => path, ex: /tmp
15
+
16
+ # :type => :plain - return full string
17
+ # => :gzip - save file to gzip
18
+ # => :stream - save to stream
19
+ # => :file - just save to file * default
20
+
21
+ def initialize(opts = {})
22
+ @options = opts
23
+ end
24
+
25
+ # do export :to - filename or stream
26
+ def export(to, opts = {})
27
+ @local_options = opts
28
+
29
+ with_temp_file(to, o(:temp_file), o(:temp_dir)) do |_to|
30
+ export_to(_to)
31
+ end
32
+ end
33
+
34
+ protected
35
+
36
+ def with_temp_file(to, use_temp_file, tmp_dir)
37
+ if use_temp_file
38
+ check_to_str(to)
39
+
40
+ require 'fileutils'
41
+ require 'tempfile'
42
+
43
+ tempfile = Tempfile.new("pg_csv", tmp_dir || '/tmp')
44
+ yield(tempfile.path)
45
+ FileUtils.mv(tempfile.path, to)
46
+ info "<=== moving export to #{to}"
47
+ else
48
+ yield(to)
49
+ end
50
+ end
51
+
52
+ def export_to(to)
53
+ exporter = method(:export_to_stream).to_proc
54
+
55
+ start = Time.now
56
+ info "===> start generate export #{to}, type: #{type}"
57
+
58
+ result = nil
59
+
60
+ case type
61
+
62
+ when :file
63
+ check_to_str(to)
64
+ File.open(to, 'w', &exporter)
65
+
66
+ when :gzip
67
+ check_to_str(to)
68
+ Zlib::GzipWriter.open(to, &exporter)
69
+
70
+ when :stream
71
+ exporter[to]
72
+
73
+ when :plain
74
+ require 'stringio'
75
+ sio = StringIO.new
76
+ exporter[sio]
77
+ result = sio.string
78
+
79
+ end
80
+
81
+ info "<=== finished write #{to} in #{Time.now - start}"
82
+
83
+ result
84
+ end
85
+
86
+ def check_to_str(to)
87
+ raise "to should be an string" unless to.is_a?(String)
88
+ end
89
+
90
+ def export_to_stream(stream)
91
+ write_csv(stream)
92
+ stream.flush
93
+ end
94
+
95
+ def write_csv(stream)
96
+ count = 0
97
+
98
+ load_data do |row|
99
+ count += 1
100
+ stream.write prepare_row(row)
101
+ end
102
+
103
+ info "<= done exporting (#{count}) records."
104
+ count
105
+ end
106
+
107
+ def load_data
108
+ info "#{query}"
109
+ conn = connection.raw_connection
110
+
111
+ info "=> query"
112
+ q = conn.exec(query)
113
+ info "<= query"
114
+
115
+ info "=> write data"
116
+ yield(columns_str) if columns_str
117
+
118
+ while row = conn.get_copy_data()
119
+ yield row
120
+ end
121
+ info "<= write data"
122
+
123
+ q.clear
124
+ end
125
+
126
+ def query
127
+ <<-SQL
128
+ COPY (
129
+ #{o(:sql)}
130
+ ) TO STDOUT
131
+ WITH CSV
132
+ DELIMITER '#{delimiter}'
133
+ #{use_pg_header? ? 'HEADER' : ''}
134
+ SQL
135
+ end
136
+
137
+ def prepare_row(row)
138
+ row
139
+ end
140
+
141
+ def info(message)
142
+ logger.info(message) if logger
143
+ end
144
+
145
+ # ==== options/defaults =============
146
+
147
+ def o(key)
148
+ @local_options[key] || @options[key]
149
+ end
150
+
151
+ def connection
152
+ o(:connection) || (defined?(ActiveRecord::Base) ? ActiveRecord::Base.connection : nil)
153
+ end
154
+
155
+ def logger
156
+ o(:logger)
157
+ end
158
+
159
+ def type
160
+ o(:type) || :file
161
+ end
162
+
163
+ def use_pg_header?
164
+ o(:header) && !o(:columns)
165
+ end
166
+
167
+ def columns_str
168
+ if o(:columns)
169
+ col = o(:columns)
170
+ if col.is_a?(Array)
171
+ col.join(delimiter) + "\n"
172
+ else
173
+ col + "\n"
174
+ end
175
+ end
176
+ end
177
+
178
+ def delimiter
179
+ o(:delimiter) || ','
180
+ end
181
+
182
+ end
@@ -0,0 +1,3 @@
1
+ class PgCsv
2
+ VERSION = "0.1"
3
+ end
data/pg_csv.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+
3
+ # Maintain your gem's version:
4
+ require "pg_csv_version"
5
+
6
+ # Describe your gem and declare its dependencies:
7
+ Gem::Specification.new do |s|
8
+ s.name = "pg_csv"
9
+ s.version = PgCsv::VERSION
10
+ s.authors = ["Makarchev Konstantin"]
11
+ s.email = ["kostya27@gmail.com"]
12
+ s.homepage = "http://github.com/kostya/pg_csv"
13
+ s.summary = "Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows."
14
+ s.description = "Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows."
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_dependency "pg"
22
+ s.add_dependency "activerecord"
23
+ s.add_development_dependency "rspec"
24
+ s.add_development_dependency "rake"
25
+
26
+ end
@@ -0,0 +1,124 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe PgCsv do
4
+
5
+ before :each do
6
+ Test.delete_all
7
+ Test.create :a => 1, :b => 2, :c => 3
8
+ Test.create :a => 4, :b => 5, :c => 6
9
+
10
+ @name = tmp_dir + "1.csv"
11
+ @gzname = tmp_dir + "1.gz"
12
+
13
+ FileUtils.rm(@name) rescue nil
14
+ FileUtils.rm(@gzname) rescue nil
15
+
16
+ @sql0 = "select a,b,c from tests order by a asc"
17
+ @sql = "select a,b,c from tests order by a desc"
18
+ end
19
+
20
+ after :each do
21
+ FileUtils.rm(@name) rescue nil
22
+ FileUtils.rm(@gzname) rescue nil
23
+ end
24
+
25
+
26
+ describe "simple export" do
27
+
28
+ it "1" do
29
+ PgCsv.new(:sql => @sql0).export(@name)
30
+ with_file(@name){|d| d.should == "1,2,3\n4,5,6\n" }
31
+ end
32
+
33
+ it "2" do
34
+ PgCsv.new(:sql => @sql).export(@name)
35
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
36
+ end
37
+
38
+ it "delimiter" do
39
+ PgCsv.new(:sql => @sql).export(@name, :delimiter => "|")
40
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n" }
41
+ end
42
+
43
+
44
+ describe "headers" do
45
+ it "header" do
46
+ PgCsv.new(:sql => @sql).export(@name, :header => true)
47
+ with_file(@name){|d| d.should == "a,b,c\n4,5,6\n1,2,3\n" }
48
+ end
49
+
50
+ it "columns" do
51
+ PgCsv.new(:sql => @sql).export(@name, :columns => %w(q w e))
52
+ with_file(@name){|d| d.should == "q,w,e\n4,5,6\n1,2,3\n" }
53
+ end
54
+
55
+ it "columns with header" do
56
+ PgCsv.new(:sql => @sql).export(@name, :header => true, :columns => %w(q w e))
57
+
58
+ with_file(@name) do |d|
59
+ d.should == "q,w,e\n4,5,6\n1,2,3\n"
60
+ end
61
+ end
62
+ end
63
+
64
+ end
65
+
66
+ describe "moving options no matter" do
67
+ it "1" do
68
+ PgCsv.new(:sql => @sql).export(@name, :delimiter => "|")
69
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n" }
70
+ end
71
+
72
+ it "2" do
73
+ PgCsv.new(:delimiter => "|").export(@name, :sql => @sql)
74
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n"}
75
+ end
76
+ end
77
+
78
+ describe "local options dont recover global" do
79
+ it "test" do
80
+ e = PgCsv.new(:sql => @sql, :delimiter => "*")
81
+ e.export(@name, :delimiter => "|")
82
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n" }
83
+
84
+ e.export(@name)
85
+ with_file(@name){|d| d.should == "4*5*6\n1*2*3\n" }
86
+ end
87
+ end
88
+
89
+ describe "using temp file" do
90
+ it "at least file should return to target" do
91
+ File.exists?(@name).should be_false
92
+ PgCsv.new(:sql => @sql, :temp_file => true, :temp_dir => tmp_dir).export(@name)
93
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
94
+ end
95
+ end
96
+
97
+ describe "different types of export" do
98
+ it "gzip export" do
99
+ File.exists?(@gzname).should be_false
100
+ PgCsv.new(:sql => @sql, :type => :gzip).export(@gzname)
101
+ with_gzfile(@gzname){|d| d.should == "4,5,6\n1,2,3\n" }
102
+ end
103
+
104
+ it "plain export" do
105
+ PgCsv.new(:sql => @sql, :type => :plain).export(nil).should == "4,5,6\n1,2,3\n"
106
+ end
107
+
108
+ it "custom stream" do
109
+ ex = PgCsv.new(:sql => @sql, :type => :stream)
110
+ File.open(@name, 'w') do |stream|
111
+ ex.export(stream)
112
+ ex.export(stream, :sql => @sql0)
113
+ end
114
+
115
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n1,2,3\n4,5,6\n" }
116
+ end
117
+
118
+ it "file as default" do
119
+ PgCsv.new(:sql => @sql, :type => :file).export(@name)
120
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
121
+ end
122
+ end
123
+
124
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require "bundler"
3
+ Bundler.setup
4
+ ENV['RAILS_ENV'] ||= 'test'
5
+
6
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
7
+ require 'pg_csv'
8
+
9
+ require File.dirname(__FILE__) + '/spec_support.rb'
@@ -0,0 +1,50 @@
1
+ require 'fileutils'
2
+
3
+ conn = {'adapter' => 'postgresql', 'database' => 'pgcsv_test', 'encoding' => 'utf8', 'username' => 'kostya', 'password' => 'password'}
4
+ ActiveRecord::Base.establish_connection conn
5
+
6
+ class Test < ActiveRecord::Base
7
+ self.table_name = 'tests'
8
+ end
9
+
10
+ def pg_create_schema
11
+ ActiveRecord::Migration.create_table :tests do |t|
12
+ t.integer :a
13
+ t.integer :b
14
+ t.integer :c
15
+ end
16
+ end
17
+
18
+ def pg_drop_data
19
+ ActiveRecord::Migration.drop_table :tests
20
+ end
21
+
22
+ pg_drop_data rescue nil
23
+ pg_create_schema
24
+
25
+ def tmp_dir
26
+ File.dirname(__FILE__) + "/tmp/"
27
+ end
28
+
29
+ def with_file(name)
30
+ File.exists?(name).should be_true
31
+ q = 1
32
+ File.open(name) do |file|
33
+ data = file.read
34
+ yield data
35
+ q = 2
36
+ end
37
+
38
+ q.should == 2
39
+ end
40
+
41
+ def with_gzfile(name)
42
+ File.exist?(name).should be_true
43
+ q = 1
44
+ Zlib::GzipReader.open(name) do |gz|
45
+ data = gz.read
46
+ yield data
47
+ q = 2
48
+ end
49
+ q.should == 2
50
+ end
data/spec/tmp/.gitkeep ADDED
File without changes
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_csv
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Makarchev Konstantin
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-05-31 00:00:00 +04:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: pg
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: activerecord
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ type: :development
61
+ version_requirements: *id003
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ prerelease: false
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :development
75
+ version_requirements: *id004
76
+ description: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
77
+ email:
78
+ - kostya27@gmail.com
79
+ executables: []
80
+
81
+ extensions: []
82
+
83
+ extra_rdoc_files: []
84
+
85
+ files:
86
+ - .gitignore
87
+ - Gemfile
88
+ - Gemfile.lock
89
+ - MIT-LICENSE
90
+ - README.md
91
+ - Rakefile
92
+ - lib/pg_csv.rb
93
+ - lib/pg_csv_version.rb
94
+ - pg_csv.gemspec
95
+ - spec/pg_csv_spec.rb
96
+ - spec/spec_helper.rb
97
+ - spec/spec_support.rb
98
+ - spec/tmp/.gitkeep
99
+ has_rdoc: true
100
+ homepage: http://github.com/kostya/pg_csv
101
+ licenses: []
102
+
103
+ post_install_message:
104
+ rdoc_options: []
105
+
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ hash: 3
114
+ segments:
115
+ - 0
116
+ version: "0"
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ hash: 3
123
+ segments:
124
+ - 0
125
+ version: "0"
126
+ requirements: []
127
+
128
+ rubyforge_project:
129
+ rubygems_version: 1.3.7
130
+ signing_key:
131
+ specification_version: 3
132
+ summary: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
133
+ test_files: []
134
+