jsuchal-activerecord-fast-import 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown ADDED
@@ -0,0 +1,66 @@
1
+ # activerecord-fast-import
2
+
3
+ Loads data from text files into tables using fast native MySQL [LOAD DATA INFILE](http://dev.mysql.com/doc/refman/5.1/en/load-data.html) query.
4
+
5
+ ## Examples
6
+
7
+ ### Loading data from tab delimited log file
8
+
9
+ Suppose you have an ActiveRecord model LogEntry defined as `LogEntry happened_at:datetime url:string` and a log file with tab delimited columns like this:
10
+
11
+ 2009-09-30 12:32:43<tab>http://github.com/
12
+ 2009-09-30 13:36:13<tab>http://facebook.com/
13
+
14
+ To import data from this log file, you have to use
15
+
16
+ LogEntry.fast_import('huge.log')
17
+
18
+ That's it!
19
+
20
+ Of course in real world you will also need more advanced features. Read on...
21
+
22
+
23
+ ### Changing delimiters and ignoring some rows
24
+
25
+ Of course not all log files are delimited by tabs and newlines. Just pass custom delimiters to options. If you want to ignore first 10 lines, just use `:ignore_lines`
26
+
27
+ import_options = {
28
+ :fields_terminated_by => ',',
29
+ :lines_terminated_by => ';',
30
+ :ignore_lines => 10
31
+ }
32
+
33
+ ### Changing order of columns and ignoring columns
34
+
35
+ Now, imagine you want to import data from a huge log file with following format:
36
+
37
+ http://github.com/<tab>Mozilla<tab>2009-09-30 12:32:43
38
+ http://facebook.com/<tab>Opera<tab>2009-09-30 13:36:13
39
+
40
+ It is clear that columns are in different order and we even want to ignore the second column. Let's do it
41
+
42
+ import_options = {:columns => ["url", "@dummy", "happened_at"]}
43
+ LogEntry.fast_import('huge.log', import_options)
44
+
45
+ The special `@dummy` loads that column into a local variable and when unused (in a transformation) is just ignored.
46
+
47
+ ### Transforming data
48
+
49
+ Now imagine we have a log file like this:
50
+
51
+ 2009-09-30 12:32:43<tab>http://github.com/<tab>image.jpg
52
+ 2009-09-30 13:36:13<tab>http://facebook.com/<tab>styles/default.css
53
+
54
+ We want to concatenate those two columns into one.
55
+
56
+ import_options = {
57
+ :columns => ["happened_at", "@domain", "@file"],
58
+ :mapping => { :url => "CONCAT(@domain, @file)" }
59
+ }
60
+ LogEntry.fast_import('huge.log', import_options)
61
+
62
+ Of course you can use any of those shiny [MySQL functions](http://dev.mysql.com/doc/refman/5.1/en/functions.html).
63
+
64
+ ## Copyright
65
+
66
+ Copyright (c) 2009 Jan Suchal. See LICENSE for details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{activerecord-fast-import}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jan Suchal"]
@@ -14,13 +14,13 @@ Gem::Specification.new do |s|
14
14
  s.email = %q{johno@jsmf.net}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE",
17
- "README.rdoc"
17
+ "README.markdown"
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
21
  ".gitignore",
22
22
  "LICENSE",
23
- "README.rdoc",
23
+ "README.markdown",
24
24
  "Rakefile",
25
25
  "VERSION",
26
26
  "activerecord-fast-import.gemspec",
@@ -1,53 +1,53 @@
1
- module ActiveRecord
2
- class Base
3
- # Deletes all rows in table very fast, but without calling +destroy+ method
4
- # nor any hooks.
5
- def self.truncate_table
6
- connection.execute("TRUNCATE TABLE #{quoted_table_name}")
7
- end
8
-
9
- # Disables key updates for model table
10
- def self.disable_keys
11
- connection.execute("ALTER TABLE #{quoted_table_name} DISABLE KEYS")
12
- end
13
-
14
- # Enables key updates for model table
15
- def self.enable_keys
16
- connection.execute("ALTER TABLE #{quoted_table_name} ENABLE KEYS")
17
- end
18
-
19
- # Loads data from file using MySQL native LOAD DATA INFILE query, disabling
20
- # key updates for even faster import speed
21
- #
22
- # ==== Parameters
23
- # * +file+ the file to load
24
- # * +options+ (see <tt>load_data_infile?</tt>)
25
- def self.fast_import(file, options = {})
26
- disable_keys
27
- load_data_infile(file, options)
28
- enable_keys
29
- end
30
-
31
- # Loads data from file using MySQL native LOAD DATA INFILE query
32
- #
33
- # ==== Parameters
34
- # * +file+ the file to import
35
- # * +options+
36
- def self.load_data_infile(file, options = {})
37
- sql = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{quoted_table_name} "
38
- sql << "FIELDS TERMINATED BY '#{options[:fields_terminated_by]}' " if options[:fields_terminated_by]
39
- sql << "LINES TERMINATED BY '#{options[:lines_terminated_by]}' " if options[:lines_terminated_by]
40
- sql << "IGNORE #{options[:ignore_lines]} LINES " if options[:ignore_lines]
41
- sql << "(" + options[:columns].join(', ') + ") " if options[:columns]
42
- if options[:mapping]
43
- mappings = []
44
- options[:mapping].each_pair do |column, mapping|
45
- mappings << "#{column} = #{mapping}"
46
- end
47
- sql << "SET #{mappings.join(', ')} " if mappings.size > 0
48
- end
49
- sql << ";"
50
- connection.execute(sql)
51
- end
52
- end
1
+ module ActiveRecord #:nodoc:
2
+ class Base
3
+ # Deletes all rows in table very fast, but without calling +destroy+ method
4
+ # nor any hooks.
5
+ def self.truncate_table
6
+ connection.execute("TRUNCATE TABLE #{quoted_table_name}")
7
+ end
8
+
9
+ # Disables key updates for model table
10
+ def self.disable_keys
11
+ connection.execute("ALTER TABLE #{quoted_table_name} DISABLE KEYS")
12
+ end
13
+
14
+ # Enables key updates for model table
15
+ def self.enable_keys
16
+ connection.execute("ALTER TABLE #{quoted_table_name} ENABLE KEYS")
17
+ end
18
+
19
+ # Loads data from file using MySQL native LOAD DATA INFILE query, disabling
20
+ # key updates for even faster import speed
21
+ #
22
+ # ==== Parameters
23
+ # * +file+ the file to load
24
+ # * +options+ (see <tt>load_data_infile</tt>)
25
+ def self.fast_import(file, options = {})
26
+ disable_keys
27
+ load_data_infile(file, options)
28
+ enable_keys
29
+ end
30
+
31
+ # Loads data from file using MySQL native LOAD DATA INFILE query
32
+ #
33
+ # ==== Parameters
34
+ # * +file+ the file to import
35
+ # * +options+
36
+ def self.load_data_infile(file, options = {})
37
+ sql = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{quoted_table_name} "
38
+ sql << "FIELDS TERMINATED BY '#{options[:fields_terminated_by]}' " if options[:fields_terminated_by]
39
+ sql << "LINES TERMINATED BY '#{options[:lines_terminated_by]}' " if options[:lines_terminated_by]
40
+ sql << "IGNORE #{options[:ignore_lines]} LINES " if options[:ignore_lines]
41
+ sql << "(" + options[:columns].join(', ') + ") " if options[:columns]
42
+ if options[:mapping]
43
+ mappings = []
44
+ options[:mapping].each_pair do |column, mapping|
45
+ mappings << "#{column} = #{mapping}"
46
+ end
47
+ sql << "SET #{mappings.join(', ')} " if mappings.size > 0
48
+ end
49
+ sql << ";"
50
+ connection.execute(sql)
51
+ end
52
+ end
53
53
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jsuchal-activerecord-fast-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Suchal
@@ -40,12 +40,12 @@ extensions: []
40
40
 
41
41
  extra_rdoc_files:
42
42
  - LICENSE
43
- - README.rdoc
43
+ - README.markdown
44
44
  files:
45
45
  - .document
46
46
  - .gitignore
47
47
  - LICENSE
48
- - README.rdoc
48
+ - README.markdown
49
49
  - Rakefile
50
50
  - VERSION
51
51
  - activerecord-fast-import.gemspec
data/README.rdoc DELETED
@@ -1,5 +0,0 @@
1
- = activerecord-fast-import
2
-
3
- == Copyright
4
-
5
- Copyright (c) 2009 Jan Suchal. See LICENSE for details.