pg_csv 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ .bundle/
2
+ log/*.log
3
+ pkg/
4
+ test/dummy/db/*.sqlite3
5
+ test/dummy/log/*.log
6
+ test/dummy/tmp/
7
+ test/dummy/.sass-cache
8
+ *.csv
9
+ *.gz
10
+ *.log
11
+ tmp/*
12
+ !tmp/.gitkeep
13
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem 'activerecord', :require => "active_record"
data/Gemfile.lock ADDED
@@ -0,0 +1,46 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ pg_csv (0.1)
5
+ activerecord
6
+ pg
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ activemodel (3.2.3)
12
+ activesupport (= 3.2.3)
13
+ builder (~> 3.0.0)
14
+ activerecord (3.2.3)
15
+ activemodel (= 3.2.3)
16
+ activesupport (= 3.2.3)
17
+ arel (~> 3.0.2)
18
+ tzinfo (~> 0.3.29)
19
+ activesupport (3.2.3)
20
+ i18n (~> 0.6)
21
+ multi_json (~> 1.0)
22
+ arel (3.0.2)
23
+ builder (3.0.0)
24
+ diff-lcs (1.1.3)
25
+ i18n (0.6.0)
26
+ multi_json (1.3.6)
27
+ pg (0.13.2)
28
+ rake (0.9.2.2)
29
+ rspec (2.10.0)
30
+ rspec-core (~> 2.10.0)
31
+ rspec-expectations (~> 2.10.0)
32
+ rspec-mocks (~> 2.10.0)
33
+ rspec-core (2.10.1)
34
+ rspec-expectations (2.10.0)
35
+ diff-lcs (~> 1.1.3)
36
+ rspec-mocks (2.10.1)
37
+ tzinfo (0.3.33)
38
+
39
+ PLATFORMS
40
+ ruby
41
+
42
+ DEPENDENCIES
43
+ activerecord
44
+ pg_csv!
45
+ rake
46
+ rspec
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2012 Makarchev K
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,57 @@
1
+ PgCsv
2
+ =====
3
+
4
+ Fast AR/PostgreSQL csv export. Uses pg function 'copy to csv'. Effective on millions rows.
5
+
6
+ Gemfile:
7
+ ``` ruby
8
+ gem 'pg_csv'
9
+ ```
10
+
11
+ Usage:
12
+ ``` ruby
13
+ PgCsv.new(opts).export(to, opts)
14
+ ```
15
+
16
+ 'to' is a stream or filename
17
+
18
+ Options:
19
+ ``` ruby
20
+ :sql => "select p.* from users u, projects p where p.user_id = u.id order by email limit 10"
21
+ :connection => AR.connection
22
+ :delimiter => ["\t", ",", ]
23
+ :header => boolean, use pg header for fields?
24
+ :logger => logger
25
+ :columns => manual array of column names, ignore :header option
26
+
27
+ :temp_file => boolean, generate throught temp file, final file appears by mv
28
+ :temp_dir => for :temp_file, ex: '/tmp'
29
+
30
+ :type => :plain - return full string
31
+ => :gzip - save file to gzip
32
+ => :stream - save to stream
33
+ => :file - just save to file = default
34
+ ```
35
+
36
+ Examples:
37
+ ``` ruby
38
+ PgCsv.new(:sql => User.good.to_sql).export('a1.csv')
39
+ PgCsv.new(:sql => sql).export('a2.gz', :type => :gzip)
40
+ PgCsv.new(:sql => sql).export('a3.csv', :temp_file => true)
41
+ PgCsv.new(:sql => sql).export(nil, :type => :plain)
42
+ File.open("a4.csv", 'a'){|f| FastPgCsv.new(:sql => "select * from users").\
43
+ export(f, :type => :stream) }
44
+ PgCsv.new(:sql => sql).export('a5.csv', :delimiter => "\t")
45
+ PgCsv.new(:sql => sql).export('a6.csv', :header => true)
46
+ PgCsv.new(:sql => sql).export('a7.csv', :columns => %w{id a b c})
47
+ PgCsv.new(:sql => sql, :connection => SomeDb.connection, :columns => %w{id a b c}, :delimiter => "|").\
48
+ export('a8.gz', :type => :gzip, :temp_file => true)
49
+
50
+ # example collect from shards
51
+ Zlib::GzipWriter.open('some.gz') do |stream|
52
+ e = PgCsv.new(:sql => sql, :type => :stream)
53
+ ConnectionPool.each_shard do |connection|
54
+ e.export(stream, :connection => connection)
55
+ end
56
+ end
57
+ ```
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'bundler'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+ task :default => :spec
8
+ RSpec::Core::RakeTask.new(:spec)
data/lib/pg_csv.rb ADDED
@@ -0,0 +1,182 @@
1
+ require 'active_record'
2
+
3
+ class PgCsv
4
+
5
+ # opts:
6
+ # :sql => "select u.*, p.* from users u, projects p where p.user_id = u.id order by email limit 100"
7
+ # :connection => AR.connection
8
+ # :delimiter => ["\t", ",", ]
9
+ # :header => boolean, use pg header for fields?
10
+ # :logger => logger
11
+ # :columns => manual array of column names, ignore :header option
12
+
13
+ # :temp_file => boolean, generating throught temp file, final file appears by mv
14
+ # :temp_dir => path, ex: /tmp
15
+
16
+ # :type => :plain - return full string
17
+ # => :gzip - save file to gzip
18
+ # => :stream - save to stream
19
+ # => :file - just save to file * default
20
+
21
+ def initialize(opts = {})
22
+ @options = opts
23
+ end
24
+
25
+ # do export :to - filename or stream
26
+ def export(to, opts = {})
27
+ @local_options = opts
28
+
29
+ with_temp_file(to, o(:temp_file), o(:temp_dir)) do |_to|
30
+ export_to(_to)
31
+ end
32
+ end
33
+
34
+ protected
35
+
36
+ def with_temp_file(to, use_temp_file, tmp_dir)
37
+ if use_temp_file
38
+ check_to_str(to)
39
+
40
+ require 'fileutils'
41
+ require 'tempfile'
42
+
43
+ tempfile = Tempfile.new("pg_csv", tmp_dir || '/tmp')
44
+ yield(tempfile.path)
45
+ FileUtils.mv(tempfile.path, to)
46
+ info "<=== moving export to #{to}"
47
+ else
48
+ yield(to)
49
+ end
50
+ end
51
+
52
+ def export_to(to)
53
+ exporter = method(:export_to_stream).to_proc
54
+
55
+ start = Time.now
56
+ info "===> start generate export #{to}, type: #{type}"
57
+
58
+ result = nil
59
+
60
+ case type
61
+
62
+ when :file
63
+ check_to_str(to)
64
+ File.open(to, 'w', &exporter)
65
+
66
+ when :gzip
67
+ check_to_str(to)
68
+ Zlib::GzipWriter.open(to, &exporter)
69
+
70
+ when :stream
71
+ exporter[to]
72
+
73
+ when :plain
74
+ require 'stringio'
75
+ sio = StringIO.new
76
+ exporter[sio]
77
+ result = sio.string
78
+
79
+ end
80
+
81
+ info "<=== finished write #{to} in #{Time.now - start}"
82
+
83
+ result
84
+ end
85
+
86
+ def check_to_str(to)
87
+ raise "to should be an string" unless to.is_a?(String)
88
+ end
89
+
90
+ def export_to_stream(stream)
91
+ write_csv(stream)
92
+ stream.flush
93
+ end
94
+
95
+ def write_csv(stream)
96
+ count = 0
97
+
98
+ load_data do |row|
99
+ count += 1
100
+ stream.write prepare_row(row)
101
+ end
102
+
103
+ info "<= done exporting (#{count}) records."
104
+ count
105
+ end
106
+
107
+ def load_data
108
+ info "#{query}"
109
+ conn = connection.raw_connection
110
+
111
+ info "=> query"
112
+ q = conn.exec(query)
113
+ info "<= query"
114
+
115
+ info "=> write data"
116
+ yield(columns_str) if columns_str
117
+
118
+ while row = conn.get_copy_data()
119
+ yield row
120
+ end
121
+ info "<= write data"
122
+
123
+ q.clear
124
+ end
125
+
126
+ def query
127
+ <<-SQL
128
+ COPY (
129
+ #{o(:sql)}
130
+ ) TO STDOUT
131
+ WITH CSV
132
+ DELIMITER '#{delimiter}'
133
+ #{use_pg_header? ? 'HEADER' : ''}
134
+ SQL
135
+ end
136
+
137
+ def prepare_row(row)
138
+ row
139
+ end
140
+
141
+ def info(message)
142
+ logger.info(message) if logger
143
+ end
144
+
145
+ # ==== options/defaults =============
146
+
147
+ def o(key)
148
+ @local_options[key] || @options[key]
149
+ end
150
+
151
+ def connection
152
+ o(:connection) || (defined?(ActiveRecord::Base) ? ActiveRecord::Base.connection : nil)
153
+ end
154
+
155
+ def logger
156
+ o(:logger)
157
+ end
158
+
159
+ def type
160
+ o(:type) || :file
161
+ end
162
+
163
+ def use_pg_header?
164
+ o(:header) && !o(:columns)
165
+ end
166
+
167
+ def columns_str
168
+ if o(:columns)
169
+ col = o(:columns)
170
+ if col.is_a?(Array)
171
+ col.join(delimiter) + "\n"
172
+ else
173
+ col + "\n"
174
+ end
175
+ end
176
+ end
177
+
178
+ def delimiter
179
+ o(:delimiter) || ','
180
+ end
181
+
182
+ end
@@ -0,0 +1,3 @@
1
+ class PgCsv
2
+ VERSION = "0.1"
3
+ end
data/pg_csv.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+
3
+ # Maintain your gem's version:
4
+ require "pg_csv_version"
5
+
6
+ # Describe your gem and declare its dependencies:
7
+ Gem::Specification.new do |s|
8
+ s.name = "pg_csv"
9
+ s.version = PgCsv::VERSION
10
+ s.authors = ["Makarchev Konstantin"]
11
+ s.email = ["kostya27@gmail.com"]
12
+ s.homepage = "http://github.com/kostya/pg_csv"
13
+ s.summary = "Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows."
14
+ s.description = "Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows."
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_dependency "pg"
22
+ s.add_dependency "activerecord"
23
+ s.add_development_dependency "rspec"
24
+ s.add_development_dependency "rake"
25
+
26
+ end
@@ -0,0 +1,124 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe PgCsv do
4
+
5
+ before :each do
6
+ Test.delete_all
7
+ Test.create :a => 1, :b => 2, :c => 3
8
+ Test.create :a => 4, :b => 5, :c => 6
9
+
10
+ @name = tmp_dir + "1.csv"
11
+ @gzname = tmp_dir + "1.gz"
12
+
13
+ FileUtils.rm(@name) rescue nil
14
+ FileUtils.rm(@gzname) rescue nil
15
+
16
+ @sql0 = "select a,b,c from tests order by a asc"
17
+ @sql = "select a,b,c from tests order by a desc"
18
+ end
19
+
20
+ after :each do
21
+ FileUtils.rm(@name) rescue nil
22
+ FileUtils.rm(@gzname) rescue nil
23
+ end
24
+
25
+
26
+ describe "simple export" do
27
+
28
+ it "1" do
29
+ PgCsv.new(:sql => @sql0).export(@name)
30
+ with_file(@name){|d| d.should == "1,2,3\n4,5,6\n" }
31
+ end
32
+
33
+ it "2" do
34
+ PgCsv.new(:sql => @sql).export(@name)
35
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
36
+ end
37
+
38
+ it "delimiter" do
39
+ PgCsv.new(:sql => @sql).export(@name, :delimiter => "|")
40
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n" }
41
+ end
42
+
43
+
44
+ describe "headers" do
45
+ it "header" do
46
+ PgCsv.new(:sql => @sql).export(@name, :header => true)
47
+ with_file(@name){|d| d.should == "a,b,c\n4,5,6\n1,2,3\n" }
48
+ end
49
+
50
+ it "columns" do
51
+ PgCsv.new(:sql => @sql).export(@name, :columns => %w(q w e))
52
+ with_file(@name){|d| d.should == "q,w,e\n4,5,6\n1,2,3\n" }
53
+ end
54
+
55
+ it "columns with header" do
56
+ PgCsv.new(:sql => @sql).export(@name, :header => true, :columns => %w(q w e))
57
+
58
+ with_file(@name) do |d|
59
+ d.should == "q,w,e\n4,5,6\n1,2,3\n"
60
+ end
61
+ end
62
+ end
63
+
64
+ end
65
+
66
+ describe "moving options no matter" do
67
+ it "1" do
68
+ PgCsv.new(:sql => @sql).export(@name, :delimiter => "|")
69
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n" }
70
+ end
71
+
72
+ it "2" do
73
+ PgCsv.new(:delimiter => "|").export(@name, :sql => @sql)
74
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n"}
75
+ end
76
+ end
77
+
78
+ describe "local options dont recover global" do
79
+ it "test" do
80
+ e = PgCsv.new(:sql => @sql, :delimiter => "*")
81
+ e.export(@name, :delimiter => "|")
82
+ with_file(@name){|d| d.should == "4|5|6\n1|2|3\n" }
83
+
84
+ e.export(@name)
85
+ with_file(@name){|d| d.should == "4*5*6\n1*2*3\n" }
86
+ end
87
+ end
88
+
89
+ describe "using temp file" do
90
+ it "at least file should return to target" do
91
+ File.exists?(@name).should be_false
92
+ PgCsv.new(:sql => @sql, :temp_file => true, :temp_dir => tmp_dir).export(@name)
93
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
94
+ end
95
+ end
96
+
97
+ describe "different types of export" do
98
+ it "gzip export" do
99
+ File.exists?(@gzname).should be_false
100
+ PgCsv.new(:sql => @sql, :type => :gzip).export(@gzname)
101
+ with_gzfile(@gzname){|d| d.should == "4,5,6\n1,2,3\n" }
102
+ end
103
+
104
+ it "plain export" do
105
+ PgCsv.new(:sql => @sql, :type => :plain).export(nil).should == "4,5,6\n1,2,3\n"
106
+ end
107
+
108
+ it "custom stream" do
109
+ ex = PgCsv.new(:sql => @sql, :type => :stream)
110
+ File.open(@name, 'w') do |stream|
111
+ ex.export(stream)
112
+ ex.export(stream, :sql => @sql0)
113
+ end
114
+
115
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n1,2,3\n4,5,6\n" }
116
+ end
117
+
118
+ it "file as default" do
119
+ PgCsv.new(:sql => @sql, :type => :file).export(@name)
120
+ with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
121
+ end
122
+ end
123
+
124
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require "bundler"
3
+ Bundler.setup
4
+ ENV['RAILS_ENV'] ||= 'test'
5
+
6
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
7
+ require 'pg_csv'
8
+
9
+ require File.dirname(__FILE__) + '/spec_support.rb'
@@ -0,0 +1,50 @@
1
+ require 'fileutils'
2
+
3
+ conn = {'adapter' => 'postgresql', 'database' => 'pgcsv_test', 'encoding' => 'utf8', 'username' => 'kostya', 'password' => 'password'}
4
+ ActiveRecord::Base.establish_connection conn
5
+
6
+ class Test < ActiveRecord::Base
7
+ self.table_name = 'tests'
8
+ end
9
+
10
+ def pg_create_schema
11
+ ActiveRecord::Migration.create_table :tests do |t|
12
+ t.integer :a
13
+ t.integer :b
14
+ t.integer :c
15
+ end
16
+ end
17
+
18
+ def pg_drop_data
19
+ ActiveRecord::Migration.drop_table :tests
20
+ end
21
+
22
+ pg_drop_data rescue nil
23
+ pg_create_schema
24
+
25
+ def tmp_dir
26
+ File.dirname(__FILE__) + "/tmp/"
27
+ end
28
+
29
+ def with_file(name)
30
+ File.exists?(name).should be_true
31
+ q = 1
32
+ File.open(name) do |file|
33
+ data = file.read
34
+ yield data
35
+ q = 2
36
+ end
37
+
38
+ q.should == 2
39
+ end
40
+
41
+ def with_gzfile(name)
42
+ File.exist?(name).should be_true
43
+ q = 1
44
+ Zlib::GzipReader.open(name) do |gz|
45
+ data = gz.read
46
+ yield data
47
+ q = 2
48
+ end
49
+ q.should == 2
50
+ end
data/spec/tmp/.gitkeep ADDED
File without changes
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_csv
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - Makarchev Konstantin
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-05-31 00:00:00 +04:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: pg
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: activerecord
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ type: :development
61
+ version_requirements: *id003
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ prerelease: false
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :development
75
+ version_requirements: *id004
76
+ description: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
77
+ email:
78
+ - kostya27@gmail.com
79
+ executables: []
80
+
81
+ extensions: []
82
+
83
+ extra_rdoc_files: []
84
+
85
+ files:
86
+ - .gitignore
87
+ - Gemfile
88
+ - Gemfile.lock
89
+ - MIT-LICENSE
90
+ - README.md
91
+ - Rakefile
92
+ - lib/pg_csv.rb
93
+ - lib/pg_csv_version.rb
94
+ - pg_csv.gemspec
95
+ - spec/pg_csv_spec.rb
96
+ - spec/spec_helper.rb
97
+ - spec/spec_support.rb
98
+ - spec/tmp/.gitkeep
99
+ has_rdoc: true
100
+ homepage: http://github.com/kostya/pg_csv
101
+ licenses: []
102
+
103
+ post_install_message:
104
+ rdoc_options: []
105
+
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ hash: 3
114
+ segments:
115
+ - 0
116
+ version: "0"
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ hash: 3
123
+ segments:
124
+ - 0
125
+ version: "0"
126
+ requirements: []
127
+
128
+ rubyforge_project:
129
+ rubygems_version: 1.3.7
130
+ signing_key:
131
+ specification_version: 3
132
+ summary: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
133
+ test_files: []
134
+