data_resurrection 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Rodrigo Manhães
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,37 @@
1
+ = data_resurrection
2
+
3
+ Bring your data, buried in decrepit formats, back to life! Convert data from old formats to modern ones. Currently supports DBF.
4
+
5
+ == How to use
6
+
7
+ If you have a PostgreSQL database and a DBF file called 'decrepit.dbf', you must call:
8
+
9
+ r = DataResurrection::Resuscitator.new(:dbf, :postgresql => :settings)
10
+ r.resurrect('decrepit.dbf', :target => 'new_beautiful_table',
11
+ :from => 'WINDOWS-1252', :to => 'UTF-8')
12
+
13
+ and you will have a table called 'new_beautiful_table' in your PostgreSQL database.
14
+
15
+ The hash parameter to constructor should be the your database settings for ActiveRecord (i.e., those definitions within config/database.yml in a Rails application).
16
+
17
+ The method "resurrect" accepts ":target", ":from" and ":to" as options. Target is the name of the table to be created in the pointed database, and :from and :to (both optional) forces a encoding conversion for all fields. Parameter :from may be a list in case of multiple encodings for the same table (yes, this kind of freaky thing really exists).
18
+
19
+ In some cases, field types in original tables are not compatible with ones in the target table. By example, in a real case, a DBF table has an integer field containing a 12-digit value, what caused an overflow when trying to copy the value to an integer field on a PostGreSQL table. This can be handled with an additional option to the method "resurrect":
20
+
21
+ @data_resurrection.resurrect(@dbf_file_path, :target => 'nationality',
22
+ :from => ['WINDOWS-1252', 'CP850'], :to => 'UTF-8',
23
+ :field_types => {:nr => :string })
24
+
25
+ The option :field_types is a hash in which each key is the field name and the value is the field type in the target table.
26
+
27
+ If a field name equals to SQL reserved words or core Ruby methods like "class", the field name are appended with an underscore in the new table.
28
+
29
+
30
+ == But gem dbf does this work!
31
+
32
+ Data resurrection heavily uses this amazing gem internally, but currently it doesn't support encoding conversion, field type conversion and does not handle fields whose name equals to SQL reserved words and Ruby methods.
33
+
34
+ == Risk disclaimer
35
+
36
+ This project was built for my own use, and it works for me. Remember that database migrations are a very critical issue. Anything you do with this software is at your own risk.
37
+
@@ -0,0 +1,113 @@
1
+ # coding: utf-8
2
+
3
+ require 'dbf'
4
+ require 'iconv'
5
+
6
+ module DataResurrection
7
+ module Adapters
8
+ module DBF
9
+ def resurrect(origin_table, options)
10
+ target_table_name, from, to = options[:target], options[:from], options[:to]
11
+ field_types = options[:field_types]
12
+ table = ::DBF::Table.new(origin_table)
13
+ data = get_data(table, {from: from, to: to}, reserved_words)
14
+ create_table(table, target_table_name, data, field_types)
15
+ copy_data(target_table_name, data)
16
+ end
17
+
18
+ def get_data(table, encodings=nil, reserved_words=[])
19
+ result = get_raw_data(table, reserved_words)
20
+ result = handle_encodings(result, encodings) if encodings
21
+ result
22
+ end
23
+
24
+ private
25
+
26
+ def create_table(table, table_name, data, field_types)
27
+ schema = mark_name_clashed_fields(table.schema, data)
28
+ schema = replace_types(schema, field_types) if field_types
29
+ eval(schema)
30
+ end
31
+
32
+ def copy_data(table_name, data)
33
+ ARObject.instance_eval do
34
+ self.table_name = table_name
35
+ reset_column_information
36
+ end
37
+ data.each {|record| ARObject.create! record }
38
+ end
39
+
40
+ def get_raw_data(table, reserved_words)
41
+ result = table.map {|record|
42
+ table.columns.map {|c|
43
+ { generated_field_name(c.name.downcase, reserved_words) => record.attributes[c.name.downcase] } if record
44
+ }.
45
+ compact.
46
+ reduce({}) {|h, e| h.merge! e }
47
+ }.compact.reject {|v| v.empty? }
48
+ end
49
+
50
+ def handle_encodings(data, encodings)
51
+ from = encodings[:from].clone || []
52
+ from = [from] unless from.kind_of? Array
53
+ to = encodings[:to]
54
+ data.each do |record|
55
+ record.each do |k, v|
56
+ if v.kind_of?(String)
57
+ value = v
58
+ from.each do |encoding|
59
+ begin
60
+ ic = Iconv.new(to, encoding)
61
+ value = ic.iconv(v) if v.kind_of?(String)
62
+ break if all_valid?(value)
63
+ rescue Iconv::IllegalSequence
64
+ raise if encoding == from.last
65
+ end
66
+ end
67
+ record[k] = value
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def mark_name_clashed_fields(schema, data)
74
+ data.first.keys.each do |field|
75
+ if !schema.include?('column "%s"' % field)
76
+ schema['column "%s"' % field.chop] = 'column "%s"' % field
77
+ end
78
+ end
79
+ schema
80
+ end
81
+
82
+ def replace_types(schema, field_types)
83
+ field_types.each do |field, new_type|
84
+ schema =~ /column "#{field}", :(.+)/
85
+ tail = $1.split(':')
86
+ old_type = tail.shift
87
+ tail = tail.empty? ? "" : ":#{tail.join(':')}"
88
+ schema['column "%s", :%s%s' % [field, old_type, tail]] =
89
+ 'column "%s", :%s%s' % [field, new_type, tail]
90
+ end
91
+ schema
92
+ end
93
+
94
+ def all_valid?(string)
95
+ string.chars.all? {|c| VALID_CHARS.include? c }
96
+ end
97
+
98
+ def generated_field_name(field_name, reserved_words)
99
+ reserved_words.include?(field_name.upcase) ? "#{field_name}_" : field_name
100
+ end
101
+
102
+ class ARObject < ActiveRecord::Base
103
+ end
104
+
105
+ REGULAR_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
106
+ DIGITS = '1234567890'
107
+ ACCENTED_LETTERS = 'ÀàÁÉÍÓÚáéíìóúÂÊÔâêôÃÑÕãñõÖÜöüÇç'
108
+ SYMBOLS = "\"'!@#$\%&*()-_+=`{}[]^~,<>.:;/?|\\ "
109
+ VALID_CHARS = [REGULAR_LETTERS, DIGITS, ACCENTED_LETTERS, SYMBOLS].join
110
+ end
111
+ end
112
+ end
113
+
@@ -0,0 +1,338 @@
1
+ ABSOLUTE
2
+ ACTION
3
+ ADD
4
+ AFTER
5
+ ALL
6
+ ALLOCATE
7
+ ALTER
8
+ AND
9
+ ANY
10
+ ARE
11
+ ARRAY
12
+ AS
13
+ ASC
14
+ ASENSITIVE
15
+ ASSERTION
16
+ ASYMMETRIC
17
+ AT
18
+ ATOMIC
19
+ AUTHORIZATION
20
+ AVG
21
+ BEFORE
22
+ BEGIN
23
+ BETWEEN
24
+ BIGINT
25
+ BINARY
26
+ BIT
27
+ BIT_LENGTH
28
+ BLOB
29
+ BOOLEAN
30
+ BOTH
31
+ BREADTH
32
+ BY
33
+ CALL
34
+ CALLED
35
+ CASCADE
36
+ CASCADED
37
+ CASE
38
+ CAST
39
+ CATALOG
40
+ CHAR
41
+ CHAR_LENGTH
42
+ CHARACTER
43
+ CHARACTER_LENGTH
44
+ CHECK
45
+ CLASS
46
+ CLOB
47
+ CLOSE
48
+ COALESCE
49
+ COLLATE
50
+ COLLATION
51
+ COLUMN
52
+ COMMIT
53
+ CONDITION
54
+ CONNECT
55
+ CONNECTION
56
+ CONSTRAINT
57
+ CONSTRAINTS
58
+ CONSTRUCTOR
59
+ CONTAINS
60
+ CONTINUE
61
+ CONVERT
62
+ CORRESPONDING
63
+ COUNT
64
+ CREATE
65
+ CROSS
66
+ CUBE
67
+ CURRENT
68
+ CURRENT_DATE
69
+ CURRENT_DEFAULT_TRANSFORM_GROUP
70
+ CURRENT_PATH
71
+ CURRENT_ROLE
72
+ CURRENT_TIME
73
+ CURRENT_TIMESTAMP
74
+ CURRENT_TRANSFORM_GROUP_FOR_TYPE
75
+ CURRENT_USER
76
+ CURSOR
77
+ CYCLE
78
+ DATA
79
+ DATE
80
+ DAY
81
+ DEALLOCATE
82
+ DEC
83
+ DECIMAL
84
+ DECLARE
85
+ DEFAULT
86
+ DEFERRABLE
87
+ DEFERRED
88
+ DELETE
89
+ DEPTH
90
+ DEREF
91
+ DESC
92
+ DESCRIBE
93
+ DESCRIPTOR
94
+ DETERMINISTIC
95
+ DIAGNOSTICS
96
+ DISCONNECT
97
+ DISTINCT
98
+ DO
99
+ DOMAIN
100
+ DOUBLE
101
+ DROP
102
+ DYNAMIC
103
+ EACH
104
+ ELEMENT
105
+ ELSE
106
+ ELSEIF
107
+ END
108
+ EQUALS
109
+ ESCAPE
110
+ EXCEPT
111
+ EXCEPTION
112
+ EXEC
113
+ EXECUTE
114
+ EXISTS
115
+ EXIT
116
+ EXTERNAL
117
+ EXTRACT
118
+ FALSE
119
+ FETCH
120
+ FILTER
121
+ FIRST
122
+ FLOAT
123
+ FOR
124
+ FOREIGN
125
+ FOUND
126
+ FREE
127
+ FROM
128
+ FULL
129
+ FUNCTION
130
+ GENERAL
131
+ GET
132
+ GLOBAL
133
+ GO
134
+ GOTO
135
+ GRANT
136
+ GROUP
137
+ GROUPING
138
+ HANDLER
139
+ HAVING
140
+ HOLD
141
+ HOUR
142
+ IDENTITY
143
+ IF
144
+ IMMEDIATE
145
+ IN
146
+ INDICATOR
147
+ INITIALLY
148
+ INNER
149
+ INOUT
150
+ INPUT
151
+ INSENSITIVE
152
+ INSERT
153
+ INT
154
+ INTEGER
155
+ INTERSECT
156
+ INTERVAL
157
+ INTO
158
+ IS
159
+ ISOLATION
160
+ ITERATE
161
+ JOIN
162
+ KEY
163
+ LANGUAGE
164
+ LARGE
165
+ LAST
166
+ LATERAL
167
+ LEADING
168
+ LEAVE
169
+ LEFT
170
+ LEVEL
171
+ LIKE
172
+ LOCAL
173
+ LOCALTIME
174
+ LOCALTIMESTAMP
175
+ LOCATOR
176
+ LOOP
177
+ LOWER
178
+ MAP
179
+ MATCH
180
+ MAX
181
+ MEMBER
182
+ MERGE
183
+ METHOD
184
+ MIN
185
+ MINUTE
186
+ MODIFIES
187
+ MODULE
188
+ MONTH
189
+ MULTISET
190
+ NAMES
191
+ NATIONAL
192
+ NATURAL
193
+ NCHAR
194
+ NCLOB
195
+ NEW
196
+ NEXT
197
+ NO
198
+ NONE
199
+ NOT
200
+ NULL
201
+ NULLIF
202
+ NUMERIC
203
+ OBJECT
204
+ OCTET_LENGTH
205
+ OF
206
+ OLD
207
+ ON
208
+ ONLY
209
+ OPEN
210
+ OPTION
211
+ OR
212
+ ORDER
213
+ ORDINALITY
214
+ OUT
215
+ OUTER
216
+ OUTPUT
217
+ OVER
218
+ OVERLAPS
219
+ PAD
220
+ PARAMETER
221
+ PARTIAL
222
+ PARTITION
223
+ PATH
224
+ POSITION
225
+ PRECISION
226
+ PREPARE
227
+ PRESERVE
228
+ PRIMARY
229
+ PRIOR
230
+ PRIVILEGES
231
+ PROCEDURE
232
+ PUBLIC
233
+ RANGE
234
+ READ
235
+ READS
236
+ REAL
237
+ RECURSIVE
238
+ REF
239
+ REFERENCES
240
+ REFERENCING
241
+ RELATIVE
242
+ RELEASE
243
+ REPEAT
244
+ RESIGNAL
245
+ RESTRICT
246
+ RESULT
247
+ RETURN
248
+ RETURNS
249
+ REVOKE
250
+ RIGHT
251
+ ROLE
252
+ ROLLBACK
253
+ ROLLUP
254
+ ROUTINE
255
+ ROW
256
+ ROWS
257
+ SAVEPOINT
258
+ SCHEMA
259
+ SCOPE
260
+ SCROLL
261
+ SEARCH
262
+ SECOND
263
+ SECTION
264
+ SELECT
265
+ SENSITIVE
266
+ SESSION
267
+ SESSION_USER
268
+ SET
269
+ SETS
270
+ SIGNAL
271
+ SIMILAR
272
+ SIZE
273
+ SMALLINT
274
+ SOME
275
+ SPACE
276
+ SPECIFIC
277
+ SPECIFICTYPE
278
+ SQL
279
+ SQLCODE
280
+ SQLERROR
281
+ SQLEXCEPTION
282
+ SQLSTATE
283
+ SQLWARNING
284
+ START
285
+ STATE
286
+ STATIC
287
+ SUBMULTISET
288
+ SUBSTRING
289
+ SUM
290
+ SYMMETRIC
291
+ SYSTEM
292
+ SYSTEM_USER
293
+ TABLE
294
+ TABLESAMPLE
295
+ TEMPORARY
296
+ THEN
297
+ TIME
298
+ TIMESTAMP
299
+ TIMEZONE_HOUR
300
+ TIMEZONE_MINUTE
301
+ TO
302
+ TRAILING
303
+ TRANSACTION
304
+ TRANSLATE
305
+ TRANSLATION
306
+ TREAT
307
+ TRIGGER
308
+ TRIM
309
+ TRUE
310
+ UNDER
311
+ UNDO
312
+ UNION
313
+ UNIQUE
314
+ UNKNOWN
315
+ UNNEST
316
+ UNTIL
317
+ UPDATE
318
+ UPPER
319
+ USAGE
320
+ USER
321
+ USING
322
+ VALUE
323
+ VALUES
324
+ VARCHAR
325
+ VARYING
326
+ VIEW
327
+ WHEN
328
+ WHENEVER
329
+ WHERE
330
+ WHILE
331
+ WINDOW
332
+ WITH
333
+ WITHIN
334
+ WITHOUT
335
+ WORK
336
+ WRITE
337
+ YEAR
338
+ ZONE
@@ -0,0 +1,21 @@
1
+ module DataResurrection
2
+ class Resuscitator
3
+ def initialize(adapter, active_record_settings)
4
+ extend adapters[adapter]
5
+ ActiveRecord::Base.establish_connection(active_record_settings)
6
+ @connection = ActiveRecord::Base.connection
7
+ end
8
+
9
+ private
10
+
11
+ def adapters
12
+ @adapters ||= {:dbf => DataResurrection::Adapters::DBF }
13
+ end
14
+
15
+ def reserved_words
16
+ @reserved_words ||= File.read(File.expand_path(File.join(File.dirname(__FILE__), 'reserved_words'))).
17
+ each_line.map(&:chomp)
18
+ end
19
+ end
20
+ end
21
+
@@ -0,0 +1,3 @@
1
+ data_resurrection_folder = File.join(File.dirname(__FILE__), 'data_resurrection', '*.rb')
2
+ Dir.glob(data_resurrection_folder).each {|f| require f }
3
+
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_resurrection
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Rodrigo Manhães
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-27 00:00:00.000000000 -03:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activerecord
17
+ requirement: &19880520 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 3.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *19880520
26
+ - !ruby/object:Gem::Dependency
27
+ name: dbf
28
+ requirement: &19880060 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.5.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: *19880060
37
+ - !ruby/object:Gem::Dependency
38
+ name: sqlite3
39
+ requirement: &19879600 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ version: 1.3.0
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *19879600
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ requirement: &19918680 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ version: 2.6.0
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *19918680
59
+ description: Converts DBF to modern formats.
60
+ email: rmanhaes@gmail.com
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - lib/data_resurrection.rb
66
+ - lib/data_resurrection/adapters.rb
67
+ - lib/data_resurrection/resurrector.rb
68
+ - README.rdoc
69
+ - LICENSE.txt
70
+ - lib/data_resurrection/reserved_words
71
+ has_rdoc: true
72
+ homepage: https://github.com/rodrigomanhaes/data_resurrection
73
+ licenses: []
74
+ post_install_message:
75
+ rdoc_options:
76
+ - --charset=UTF-8
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project:
93
+ rubygems_version: 1.6.2
94
+ signing_key:
95
+ specification_version: 3
96
+ summary: Bring your data, buried in decrepit formats, back to life! Convert data from
97
+ old formats to modern ones. Currently supports DBF.
98
+ test_files: []