pg_csv 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +7 -1
- data/benchmark/bench.rb +48 -0
- data/lib/pg_csv.rb +19 -34
- data/lib/pg_csv_version.rb +1 -1
- data/spec/pg_csv_spec.rb +11 -3
- metadata +85 -75
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -31,6 +31,7 @@ Options:
|
|
31
31
|
=> :gzip - save file to gzip
|
32
32
|
=> :stream - save to stream
|
33
33
|
=> :file - just save to file = default
|
34
|
+
=> :yield - return each row to block
|
34
35
|
```
|
35
36
|
|
36
37
|
Examples:
|
@@ -38,7 +39,7 @@ Examples:
|
|
38
39
|
PgCsv.new(:sql => User.good.to_sql).export('a1.csv')
|
39
40
|
PgCsv.new(:sql => sql).export('a2.gz', :type => :gzip)
|
40
41
|
PgCsv.new(:sql => sql).export('a3.csv', :temp_file => true)
|
41
|
-
PgCsv.new(:sql => sql
|
42
|
+
PgCsv.new(:sql => sql, :type => :plain).export
|
42
43
|
File.open("a4.csv", 'a'){|f| FastPgCsv.new(:sql => "select * from users").\
|
43
44
|
export(f, :type => :stream) }
|
44
45
|
PgCsv.new(:sql => sql).export('a5.csv', :delimiter => "\t")
|
@@ -54,4 +55,9 @@ Zlib::GzipWriter.open('some.gz') do |stream|
|
|
54
55
|
e.export(stream, :connection => connection)
|
55
56
|
end
|
56
57
|
end
|
58
|
+
|
59
|
+
# yield example
|
60
|
+
PgCsv.new(:sql => sql, :type => :yield).export do |row|
|
61
|
+
puts row
|
62
|
+
end
|
57
63
|
```
|
data/benchmark/bench.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require "bundler"
|
3
|
+
Bundler.setup
|
4
|
+
|
5
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
6
|
+
require 'pg_csv'
|
7
|
+
|
8
|
+
require 'benchmark'
|
9
|
+
require 'fileutils'
|
10
|
+
|
11
|
+
class PgCsv
|
12
|
+
def sql
|
13
|
+
""
|
14
|
+
end
|
15
|
+
|
16
|
+
def connection
|
17
|
+
1
|
18
|
+
end
|
19
|
+
|
20
|
+
def load_data
|
21
|
+
n = o(:times).to_i
|
22
|
+
c = 0
|
23
|
+
n.times do
|
24
|
+
c += 1
|
25
|
+
yield(@block["#{c},#{c*2},#{c * 249},#{rand(100)},#{rand(n)},blablabla,hahah,ahah,ahaha,ahahah,ah,1.55234143\n"])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
filename = "./blah.test.file"
|
31
|
+
N = 500_000
|
32
|
+
|
33
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :file).export(filename) }
|
34
|
+
puts "export file #{tm}"
|
35
|
+
|
36
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :gzip).export(filename) }
|
37
|
+
puts "export gzip #{tm}"
|
38
|
+
|
39
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :plain).export }
|
40
|
+
puts "export plain #{tm}"
|
41
|
+
|
42
|
+
tm = Benchmark.realtime{ File.open(filename, 'w'){|f| PgCsv.new(:times => N, :type => :stream).export(f) } }
|
43
|
+
puts "export stream #{tm}"
|
44
|
+
|
45
|
+
tm = Benchmark.realtime{ PgCsv.new(:times => N, :type => :yield).export{|row| row } }
|
46
|
+
puts "export yield #{tm}"
|
47
|
+
|
48
|
+
FileUtils.rm(filename) rescue nil
|
data/lib/pg_csv.rb
CHANGED
@@ -2,28 +2,13 @@ require 'active_record'
|
|
2
2
|
|
3
3
|
class PgCsv
|
4
4
|
|
5
|
-
# opts:
|
6
|
-
# :sql => "select u.*, p.* from users u, projects p where p.user_id = u.id order by email limit 100"
|
7
|
-
# :connection => AR.connection
|
8
|
-
# :delimiter => ["\t", ",", ]
|
9
|
-
# :header => boolean, use pg header for fields?
|
10
|
-
# :logger => logger
|
11
|
-
# :columns => manual array of column names, ignore :header option
|
12
|
-
|
13
|
-
# :temp_file => boolean, generating throught temp file, final file appears by mv
|
14
|
-
# :temp_dir => path, ex: /tmp
|
15
|
-
|
16
|
-
# :type => :plain - return full string
|
17
|
-
# => :gzip - save file to gzip
|
18
|
-
# => :stream - save to stream
|
19
|
-
# => :file - just save to file * default
|
20
|
-
|
21
5
|
def initialize(opts = {})
|
22
6
|
@options = opts.symbolize_keys
|
23
7
|
end
|
24
8
|
|
25
9
|
# do export :to - filename or stream
|
26
|
-
def export(to, opts = {})
|
10
|
+
def export(to = nil, opts = {}, &block)
|
11
|
+
@block = block || Proc.new{|x|x}
|
27
12
|
@local_options = opts.symbolize_keys
|
28
13
|
|
29
14
|
raise ":connection should be" unless connection
|
@@ -37,7 +22,7 @@ class PgCsv
|
|
37
22
|
protected
|
38
23
|
|
39
24
|
def with_temp_file(to, use_temp_file, tmp_dir)
|
40
|
-
if use_temp_file
|
25
|
+
if use_temp_file && [:file, :gzip].include?(type)
|
41
26
|
check_str(to)
|
42
27
|
|
43
28
|
require 'fileutils'
|
@@ -70,6 +55,7 @@ protected
|
|
70
55
|
Zlib::GzipWriter.open(to, &exporter)
|
71
56
|
|
72
57
|
when :stream
|
58
|
+
raise "'to' should be" unless to
|
73
59
|
exporter[to]
|
74
60
|
|
75
61
|
when :plain
|
@@ -78,6 +64,10 @@ protected
|
|
78
64
|
exporter[sio]
|
79
65
|
result = sio.string
|
80
66
|
|
67
|
+
when :yield
|
68
|
+
# not real saving anywhere, just yield each record
|
69
|
+
raise "block should be" unless @block
|
70
|
+
result = load_data{|_|}
|
81
71
|
end
|
82
72
|
|
83
73
|
info "<=== finished write #{to} in #{Time.now - start}"
|
@@ -94,22 +84,18 @@ protected
|
|
94
84
|
end
|
95
85
|
|
96
86
|
def export_to_stream(stream)
|
97
|
-
write_csv(stream)
|
87
|
+
count = write_csv(stream)
|
98
88
|
stream.flush if stream.respond_to?(:flush)
|
89
|
+
|
90
|
+
info "<= done exporting (#{count}) records."
|
99
91
|
end
|
100
92
|
|
101
93
|
def write_csv(stream)
|
102
|
-
count = 0
|
103
|
-
|
104
94
|
load_data do |row|
|
105
|
-
|
106
|
-
stream.write prepare_row(row)
|
95
|
+
stream.write(row)
|
107
96
|
end
|
108
|
-
|
109
|
-
info "<= done exporting (#{count}) records."
|
110
|
-
count
|
111
97
|
end
|
112
|
-
|
98
|
+
|
113
99
|
def load_data
|
114
100
|
info "#{query}"
|
115
101
|
raw = connection.raw_connection
|
@@ -119,14 +105,17 @@ protected
|
|
119
105
|
info "<= query"
|
120
106
|
|
121
107
|
info "=> write data"
|
122
|
-
yield(columns_str) if columns_str
|
123
|
-
|
108
|
+
yield(@block[columns_str]) if columns_str
|
109
|
+
|
110
|
+
count = 0
|
124
111
|
while row = raw.get_copy_data()
|
125
|
-
yield
|
112
|
+
yield(@block[row])
|
113
|
+
count += 1
|
126
114
|
end
|
127
115
|
info "<= write data"
|
128
116
|
|
129
117
|
q.clear
|
118
|
+
count
|
130
119
|
end
|
131
120
|
|
132
121
|
def query
|
@@ -140,10 +129,6 @@ DELIMITER '#{delimiter}'
|
|
140
129
|
SQL
|
141
130
|
end
|
142
131
|
|
143
|
-
def prepare_row(row)
|
144
|
-
row
|
145
|
-
end
|
146
|
-
|
147
132
|
def info(message)
|
148
133
|
logger.info(message) if logger
|
149
134
|
end
|
data/lib/pg_csv_version.rb
CHANGED
data/spec/pg_csv_spec.rb
CHANGED
@@ -114,6 +114,15 @@ describe PgCsv do
|
|
114
114
|
PgCsv.new(:sql => @sql, :type => :file).export(@name)
|
115
115
|
with_file(@name){|d| d.should == "4,5,6\n1,2,3\n" }
|
116
116
|
end
|
117
|
+
|
118
|
+
it "yield export" do
|
119
|
+
rows = []
|
120
|
+
PgCsv.new(:sql => @sql, :type => :yield).export do |row|
|
121
|
+
rows << row
|
122
|
+
end.should == 2
|
123
|
+
|
124
|
+
rows.should == ["4,5,6\n", "1,2,3\n"]
|
125
|
+
end
|
117
126
|
end
|
118
127
|
|
119
128
|
describe "integration specs" do
|
@@ -138,11 +147,10 @@ describe PgCsv do
|
|
138
147
|
it "custom prepare row" do
|
139
148
|
e = PgCsv.new(:sql => @sql)
|
140
149
|
|
141
|
-
|
150
|
+
e.export(@name) do |row|
|
142
151
|
row.split(",").join("-|-")
|
143
152
|
end
|
144
|
-
|
145
|
-
e.export(@name)
|
153
|
+
|
146
154
|
with_file(@name){|d| d.should == "4-|-5-|-6\n1-|-2-|-3\n" }
|
147
155
|
end
|
148
156
|
|
metadata
CHANGED
@@ -1,94 +1,95 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_csv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 3
|
10
|
+
version: 0.1.3
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Makarchev Konstantin
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
|
18
|
+
date: 2012-06-13 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: pg
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '0'
|
22
|
-
type: :runtime
|
23
22
|
prerelease: false
|
24
|
-
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ! '>='
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: '0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: activerecord
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
33
24
|
none: false
|
34
|
-
requirements:
|
35
|
-
- -
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
38
32
|
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: activerecord
|
39
36
|
prerelease: false
|
40
|
-
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
38
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 3
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :runtime
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
47
49
|
name: rspec
|
48
|
-
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
|
-
requirements:
|
51
|
-
- - ! '>='
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: '0'
|
54
|
-
type: :development
|
55
50
|
prerelease: false
|
56
|
-
|
57
|
-
none: false
|
58
|
-
requirements:
|
59
|
-
- - ! '>='
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: rake
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
65
52
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
70
60
|
type: :development
|
61
|
+
version_requirements: *id003
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rake
|
71
64
|
prerelease: false
|
72
|
-
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
73
66
|
none: false
|
74
|
-
requirements:
|
75
|
-
- -
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :development
|
75
|
+
version_requirements: *id004
|
76
|
+
description: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
|
77
|
+
email:
|
81
78
|
- kostya27@gmail.com
|
82
79
|
executables: []
|
80
|
+
|
83
81
|
extensions: []
|
82
|
+
|
84
83
|
extra_rdoc_files: []
|
85
|
-
|
84
|
+
|
85
|
+
files:
|
86
86
|
- .gitignore
|
87
87
|
- Gemfile
|
88
88
|
- Gemfile.lock
|
89
89
|
- MIT-LICENSE
|
90
90
|
- README.md
|
91
91
|
- Rakefile
|
92
|
+
- benchmark/bench.rb
|
92
93
|
- lib/pg_csv.rb
|
93
94
|
- lib/pg_csv_version.rb
|
94
95
|
- pg_csv.gemspec
|
@@ -98,27 +99,36 @@ files:
|
|
98
99
|
- spec/tmp/.gitkeep
|
99
100
|
homepage: http://github.com/kostya/pg_csv
|
100
101
|
licenses: []
|
102
|
+
|
101
103
|
post_install_message:
|
102
104
|
rdoc_options: []
|
103
|
-
|
105
|
+
|
106
|
+
require_paths:
|
104
107
|
- lib
|
105
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
109
|
none: false
|
107
|
-
requirements:
|
108
|
-
- -
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
|
111
|
-
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
hash: 3
|
114
|
+
segments:
|
115
|
+
- 0
|
116
|
+
version: "0"
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
118
|
none: false
|
113
|
-
requirements:
|
114
|
-
- -
|
115
|
-
- !ruby/object:Gem::Version
|
116
|
-
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
hash: 3
|
123
|
+
segments:
|
124
|
+
- 0
|
125
|
+
version: "0"
|
117
126
|
requirements: []
|
127
|
+
|
118
128
|
rubyforge_project:
|
119
129
|
rubygems_version: 1.8.24
|
120
130
|
signing_key:
|
121
131
|
specification_version: 3
|
122
|
-
summary: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective
|
123
|
-
on millions rows.
|
132
|
+
summary: Fast AR/PostgreSQL csv export. Used pg function 'copy to csv'. Effective on millions rows.
|
124
133
|
test_files: []
|
134
|
+
|