usda-nutrient-database 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 55eaf87a81739b7b4da9d592c67eca5fcd857801
4
- data.tar.gz: e42ac18382b777910c4cbb6ca0546a48aaa3a48f
3
+ metadata.gz: 35f7dffdf433c3c075779ed74ffc85775784d873
4
+ data.tar.gz: 1290cbc6ab3f2abedf992dd1605e44132fe308a0
5
5
  SHA512:
6
- metadata.gz: 616a437099608f3b36179de590f12b81b97e1996e926a8d83b75118175508e51147db803560792a8b572f69252be0352b7c23e5e6f338fd30136c73009a330f3
7
- data.tar.gz: dedcb6ef862b1034fa15086f76e8059ff689a1c6f06f626392cc0790761541c22bdec009a922034e8dd184ccda6abb1b36117689242aa293fbdddac219448b85
6
+ metadata.gz: 0e2db9e3b9a216a9d0c3f1bebc8f0f9a642f1c9d7bbe0c970a70a2a6481159b14e465f39d1861966eed9e7420c909519e7b8b08e24f4b1fd87b70c2d2bb5f5be
7
+ data.tar.gz: f9319fd8b497e4df8b827d51bf2a69b0ffafb37b6e41ac24ac239dc540828830791bfc8edbd07b1fceeca610d91a826b053abd727c2541a98f9b4a7c67a7ac92
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.1.0
4
+
5
+ - Made it work with `activerecord-import` gem to speed up import times by 30x
6
+ [#16](https://github.com/mattbeedle/usda-nutrient-database/pull/16)
7
+
3
8
  ## 2.0.0
4
9
 
5
10
  - Removed IDs from tables which don't have them in USDA data. Instead opted to
data/README.md CHANGED
@@ -47,8 +47,43 @@ If you're using rails then copy the migrations across:
47
47
  rake usda_nutrient_database_engine:install:migrations
48
48
  ```
49
49
 
50
+ ## Configuration
51
+
52
+ ```ruby
53
+ UsdaNutrientDatabase.configure do |config|
54
+ config.batch_size = 20000 # import batch size, if using activerecord-import
55
+ config.perform_logging = true # default false
56
+ config.logger = Rails.logger # default Logger.new(STDOUT)
57
+ config.usda_version = 'sr25' # default sr28
58
+ end
59
+ ```
60
+
50
61
  ## Usage
51
62
 
63
+ ### Importing with UPSERT (takes around 2 minutes)
64
+
65
+ If you are running MySQL => 5.6 or PostgreSQL => 9.5 then you're in luck, you
66
+ can use UPSERT (insert or update) to speed up imports x30. To do this you're
67
+ going to need to install
68
+ [activerecord-import](https://github.com/zdennis/activerecord-import)
69
+
70
+ ```ruby
71
+ require 'activerecord-import/base'
72
+ ActiveRecord::Import.require_adapter('postgresql')
73
+
74
+ # You may want to disable logging during this process to avoid dumping huge SQL
75
+ # strings in to your logs
76
+ ActiveRecord::Base.logger = Logger.new('/dev/null')
77
+ ```
78
+
79
+ Now run the rake task which will import everything in around 2 minutes.
80
+ ```
81
+ rake usda:import
82
+ ```
83
+
84
+
85
+ ### Importing without UPSERT (takes 60+ minutes)
86
+
52
87
  Import the latest data with the import task:
53
88
  ```
54
89
  rake usda:import
@@ -60,6 +95,8 @@ tasks:
60
95
  rake -T usda
61
96
  ```
62
97
 
98
+ ### Models
99
+
63
100
  Use the models to query and profit:
64
101
  ```
65
102
  UsdaNutrientDatabase::FoodGroup
@@ -32,10 +32,18 @@ module UsdaNutrientDatabase
32
32
  end
33
33
  end
34
34
 
35
+ def activerecord_import?
36
+ defined?(ActiveRecord::Import)
37
+ end
38
+
35
39
  def configuration
36
40
  @configuration ||= UsdaNutrientDatabase::Configuration.new
37
41
  end
38
42
 
43
+ def batch_size
44
+ @batch_fize ||= configuration.batch_size
45
+ end
46
+
39
47
  def usda_version
40
48
  @usda_version ||= configuration.usda_version
41
49
  end
@@ -1,9 +1,14 @@
1
1
  module UsdaNutrientDatabase
2
2
  class Configuration
3
3
  attr_accessor :logger
4
- attr_writer :perform_logging,
4
+ attr_writer :batch_size,
5
+ :perform_logging,
5
6
  :usda_version
6
7
 
8
+ def batch_size
9
+ @batch_size ||= 10000
10
+ end
11
+
7
12
  def logger
8
13
  @logger ||= Logger.new(STDOUT)
9
14
  end
@@ -5,24 +5,35 @@ module UsdaNutrientDatabase
5
5
  class Base
6
6
  def initialize(directory)
7
7
  @directory = directory
8
+ @objects_to_import = []
8
9
  end
9
10
 
10
11
  def import
11
12
  log_import_started
12
13
  CSV.open(file_location, 'r:iso-8859-1:utf-8', csv_options) do |csv|
13
- csv.each { |row| extract_row(row) }
14
+ csv.each { |row| objects_to_import << extract_row(row) }
14
15
  end
16
+ ar_import? ? save_objects : objects_to_import.each(&:save)
15
17
  end
16
18
 
17
19
  private
18
20
 
19
- attr_reader :directory
21
+ attr_reader :directory,
22
+ :objects_to_import
23
+
24
+ def ar_import?
25
+ UsdaNutrientDatabase.activerecord_import?
26
+ end
20
27
 
21
28
  def extract_row(row)
22
- build_object(apply_typecasts(row)).save
29
+ build_object(apply_typecasts(row))
23
30
  end
24
31
 
25
32
  def build_object(row)
33
+ ar_import? ? build_values_array(row) : build_ar_object(row)
34
+ end
35
+
36
+ def build_ar_object(row)
26
37
  find_or_initialize(row).tap do |object|
27
38
  columns.each_with_index do |column, index|
28
39
  object.send("#{column}=", row[index])
@@ -30,6 +41,10 @@ module UsdaNutrientDatabase
30
41
  end
31
42
  end
32
43
 
44
+ def build_values_array(row)
45
+ columns.map.with_index { |_, index| row[index] }
46
+ end
47
+
33
48
  def columns
34
49
  raise NotImplementedError
35
50
  end
@@ -53,6 +68,10 @@ module UsdaNutrientDatabase
53
68
  def csv_options
54
69
  { col_sep: '^', quote_char: '~' }
55
70
  end
71
+
72
+ def save_objects
73
+ raise NotImplementedError
74
+ end
56
75
  end
57
76
  end
58
77
  end
@@ -19,6 +19,16 @@ module UsdaNutrientDatabase
19
19
  def log_import_started
20
20
  UsdaNutrientDatabase.log 'Importing food groups'
21
21
  end
22
+
23
+ def save_objects
24
+ UsdaNutrientDatabase::FoodGroup.import(columns, objects_to_import, {
25
+ validate: false,
26
+ on_duplicate_key_update: {
27
+ conflict_target: :code,
28
+ columns: %i(description)
29
+ }
30
+ })
31
+ end
22
32
  end
23
33
  end
24
34
  end
@@ -32,6 +32,16 @@ module UsdaNutrientDatabase
32
32
  :protein_factor, :fat_factor, :carbohydrate_factor
33
33
  ]
34
34
  end
35
+
36
+ def save_objects
37
+ UsdaNutrientDatabase::Food.import(columns, objects_to_import, {
38
+ validate: false,
39
+ on_duplicate_key_update: {
40
+ conflict_target: :nutrient_databank_number,
41
+ columns: columns
42
+ }
43
+ })
44
+ end
35
45
  end
36
46
  end
37
47
  end
@@ -4,6 +4,11 @@ module UsdaNutrientDatabase
4
4
 
5
5
  private
6
6
 
7
+ def apply_typecasts(row)
8
+ row[8] = row[8] == 'Y'
9
+ row
10
+ end
11
+
7
12
  def columns
8
13
  [
9
14
  :nutrient_databank_number, :nutrient_number, :nutrient_value,
@@ -15,8 +20,9 @@ module UsdaNutrientDatabase
15
20
  end
16
21
 
17
22
  def find_or_initialize(row)
18
- UsdaNutrientDatabase::FoodsNutrient.find_or_initialize_by(
19
- nutrient_databank_number: row[0], nutrient_number: row[1]
23
+ UsdaNutrientDatabase::FoodsNutrient.new(
24
+ nutrient_databank_number: row[0],
25
+ nutrient_number: row[3]
20
26
  )
21
27
  end
22
28
 
@@ -27,6 +33,26 @@ module UsdaNutrientDatabase
27
33
  def log_import_started
28
34
  UsdaNutrientDatabase.log 'Importing foods_nutrients'
29
35
  end
36
+
37
+ def save_objects
38
+ options = {
39
+ batch_size: UsdaNutrientDatabase.batch_size,
40
+ validate: false
41
+ }
42
+ if UsdaNutrientDatabase::FoodsNutrient.exists?
43
+ options.merge!(
44
+ on_duplicate_key_update: {
45
+ conflict_target: %i(nutrient_databank_number nutrient_number),
46
+ columns: columns
47
+ }
48
+ )
49
+ end
50
+ UsdaNutrientDatabase::FoodsNutrient.import(
51
+ columns,
52
+ objects_to_import,
53
+ options
54
+ )
55
+ end
30
56
  end
31
57
  end
32
58
  end
@@ -25,6 +25,16 @@ module UsdaNutrientDatabase
25
25
  def log_import_started
26
26
  UsdaNutrientDatabase.log 'Importing footnotes'
27
27
  end
28
+
29
+ def save_objects
30
+ UsdaNutrientDatabase::Footnote.import(columns, objects_to_import, {
31
+ validate: false,
32
+ on_duplicate_key_update: {
33
+ conflict_target: %i(nutrient_databank_number nutrient_number footnote_number),
34
+ columns: columns
35
+ }
36
+ })
37
+ end
28
38
  end
29
39
  end
30
40
  end
@@ -23,6 +23,16 @@ module UsdaNutrientDatabase
23
23
  def log_import_started
24
24
  UsdaNutrientDatabase.log 'Importing nutrients'
25
25
  end
26
+
27
+ def save_objects
28
+ UsdaNutrientDatabase::Nutrient.import(columns, objects_to_import, {
29
+ validate: false,
30
+ on_duplicate_key_update: {
31
+ conflict_target: :nutrient_number,
32
+ columns: columns
33
+ }
34
+ })
35
+ end
26
36
  end
27
37
  end
28
38
  end
@@ -13,12 +13,22 @@ module UsdaNutrientDatabase
13
13
  end
14
14
 
15
15
  def log_import_started
16
- UsdaNutrientDatabase.log 'Source code import started'
16
+ UsdaNutrientDatabase.log 'Import source codes'
17
17
  end
18
18
 
19
19
  def filename
20
20
  'SRC_CD.txt'
21
21
  end
22
+
23
+ def save_objects
24
+ UsdaNutrientDatabase::SourceCode.import(columns, objects_to_import, {
25
+ validate: false,
26
+ on_duplicate_key_update: {
27
+ conflict_target: :code,
28
+ columns: columns
29
+ }
30
+ })
31
+ end
22
32
  end
23
33
  end
24
34
  end
@@ -26,6 +26,16 @@ module UsdaNutrientDatabase
26
26
  def log_import_started
27
27
  UsdaNutrientDatabase.log 'Importing weights'
28
28
  end
29
+
30
+ def save_objects
31
+ UsdaNutrientDatabase::Weight.import(columns, objects_to_import, {
32
+ validate: false,
33
+ on_duplicate_key_update: {
34
+ conflict_target: %i(nutrient_databank_number sequence_number),
35
+ columns: columns
36
+ }
37
+ })
38
+ end
29
39
  end
30
40
  end
31
41
  end
@@ -1,3 +1,3 @@
1
1
  module UsdaNutrientDatabase
2
- VERSION = '2.0.0'
2
+ VERSION = '2.1.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: usda-nutrient-database
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Beedle