usda-nutrient-database 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +37 -0
- data/lib/usda-nutrient-database.rb +8 -0
- data/lib/usda_nutrient_database/configuration.rb +6 -1
- data/lib/usda_nutrient_database/import/base.rb +22 -3
- data/lib/usda_nutrient_database/import/food_groups.rb +10 -0
- data/lib/usda_nutrient_database/import/foods.rb +10 -0
- data/lib/usda_nutrient_database/import/foods_nutrients.rb +28 -2
- data/lib/usda_nutrient_database/import/footnotes.rb +10 -0
- data/lib/usda_nutrient_database/import/nutrients.rb +10 -0
- data/lib/usda_nutrient_database/import/source_codes.rb +11 -1
- data/lib/usda_nutrient_database/import/weights.rb +10 -0
- data/lib/usda_nutrient_database/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35f7dffdf433c3c075779ed74ffc85775784d873
|
4
|
+
data.tar.gz: 1290cbc6ab3f2abedf992dd1605e44132fe308a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e2db9e3b9a216a9d0c3f1bebc8f0f9a642f1c9d7bbe0c970a70a2a6481159b14e465f39d1861966eed9e7420c909519e7b8b08e24f4b1fd87b70c2d2bb5f5be
|
7
|
+
data.tar.gz: f9319fd8b497e4df8b827d51bf2a69b0ffafb37b6e41ac24ac239dc540828830791bfc8edbd07b1fceeca610d91a826b053abd727c2541a98f9b4a7c67a7ac92
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.1.0
|
4
|
+
|
5
|
+
- Made it work with `activerecord-import` gem to speed up import times by 30x
|
6
|
+
[#16](https://github.com/mattbeedle/usda-nutrient-database/pull/16)
|
7
|
+
|
3
8
|
## 2.0.0
|
4
9
|
|
5
10
|
- Removed IDs from tables which don't have them in USDA data. Instead opted to
|
data/README.md
CHANGED
@@ -47,8 +47,43 @@ If you're using rails then copy the migrations across:
|
|
47
47
|
rake usda_nutrient_database_engine:install:migrations
|
48
48
|
```
|
49
49
|
|
50
|
+
## Configuration
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
UsdaNutrientDatabase.configure do |config|
|
54
|
+
config.batch_size = 20000 # import batch size, if using activerecord-import
|
55
|
+
config.perform_logging = true # default false
|
56
|
+
config.logger = Rails.logger # default Logger.new(STDOUT)
|
57
|
+
config.usda_version = 'sr25' # default sr28
|
58
|
+
end
|
59
|
+
```
|
60
|
+
|
50
61
|
## Usage
|
51
62
|
|
63
|
+
### Importing with UPSERT (takes around 2 minutes)
|
64
|
+
|
65
|
+
If you are running MySQL => 5.6 or PostgreSQL => 9.5 then you're in luck, you
|
66
|
+
can use UPSERT (insert or update) to speed up imports x30. To do this you're
|
67
|
+
going to need to install
|
68
|
+
[activerecord-import](https://github.com/zdennis/activerecord-import)
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
require 'activerecord-import/base'
|
72
|
+
ActiveRecord::Import.require_adapter('postgresql')
|
73
|
+
|
74
|
+
# You may want to disable logging during this process to avoid dumping huge SQL
|
75
|
+
# strings in to your logs
|
76
|
+
ActiveRecord::Base.logger = Logger.new('/dev/null')
|
77
|
+
```
|
78
|
+
|
79
|
+
Now run the rake task which will import everything in around 2 minutes.
|
80
|
+
```
|
81
|
+
rake usda:import
|
82
|
+
```
|
83
|
+
|
84
|
+
|
85
|
+
### Importing without UPSERT (takes 60+ minutes)
|
86
|
+
|
52
87
|
Import the latest data with the import task:
|
53
88
|
```
|
54
89
|
rake usda:import
|
@@ -60,6 +95,8 @@ tasks:
|
|
60
95
|
rake -T usda
|
61
96
|
```
|
62
97
|
|
98
|
+
### Models
|
99
|
+
|
63
100
|
Use the models to query and profit:
|
64
101
|
```
|
65
102
|
UsdaNutrientDatabase::FoodGroup
|
@@ -32,10 +32,18 @@ module UsdaNutrientDatabase
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
def activerecord_import?
|
36
|
+
defined?(ActiveRecord::Import)
|
37
|
+
end
|
38
|
+
|
35
39
|
def configuration
|
36
40
|
@configuration ||= UsdaNutrientDatabase::Configuration.new
|
37
41
|
end
|
38
42
|
|
43
|
+
def batch_size
|
44
|
+
@batch_fize ||= configuration.batch_size
|
45
|
+
end
|
46
|
+
|
39
47
|
def usda_version
|
40
48
|
@usda_version ||= configuration.usda_version
|
41
49
|
end
|
@@ -1,9 +1,14 @@
|
|
1
1
|
module UsdaNutrientDatabase
|
2
2
|
class Configuration
|
3
3
|
attr_accessor :logger
|
4
|
-
attr_writer
|
4
|
+
attr_writer :batch_size,
|
5
|
+
:perform_logging,
|
5
6
|
:usda_version
|
6
7
|
|
8
|
+
def batch_size
|
9
|
+
@batch_size ||= 10000
|
10
|
+
end
|
11
|
+
|
7
12
|
def logger
|
8
13
|
@logger ||= Logger.new(STDOUT)
|
9
14
|
end
|
@@ -5,24 +5,35 @@ module UsdaNutrientDatabase
|
|
5
5
|
class Base
|
6
6
|
def initialize(directory)
|
7
7
|
@directory = directory
|
8
|
+
@objects_to_import = []
|
8
9
|
end
|
9
10
|
|
10
11
|
def import
|
11
12
|
log_import_started
|
12
13
|
CSV.open(file_location, 'r:iso-8859-1:utf-8', csv_options) do |csv|
|
13
|
-
csv.each { |row| extract_row(row) }
|
14
|
+
csv.each { |row| objects_to_import << extract_row(row) }
|
14
15
|
end
|
16
|
+
ar_import? ? save_objects : objects_to_import.each(&:save)
|
15
17
|
end
|
16
18
|
|
17
19
|
private
|
18
20
|
|
19
|
-
attr_reader :directory
|
21
|
+
attr_reader :directory,
|
22
|
+
:objects_to_import
|
23
|
+
|
24
|
+
def ar_import?
|
25
|
+
UsdaNutrientDatabase.activerecord_import?
|
26
|
+
end
|
20
27
|
|
21
28
|
def extract_row(row)
|
22
|
-
build_object(apply_typecasts(row))
|
29
|
+
build_object(apply_typecasts(row))
|
23
30
|
end
|
24
31
|
|
25
32
|
def build_object(row)
|
33
|
+
ar_import? ? build_values_array(row) : build_ar_object(row)
|
34
|
+
end
|
35
|
+
|
36
|
+
def build_ar_object(row)
|
26
37
|
find_or_initialize(row).tap do |object|
|
27
38
|
columns.each_with_index do |column, index|
|
28
39
|
object.send("#{column}=", row[index])
|
@@ -30,6 +41,10 @@ module UsdaNutrientDatabase
|
|
30
41
|
end
|
31
42
|
end
|
32
43
|
|
44
|
+
def build_values_array(row)
|
45
|
+
columns.map.with_index { |_, index| row[index] }
|
46
|
+
end
|
47
|
+
|
33
48
|
def columns
|
34
49
|
raise NotImplementedError
|
35
50
|
end
|
@@ -53,6 +68,10 @@ module UsdaNutrientDatabase
|
|
53
68
|
def csv_options
|
54
69
|
{ col_sep: '^', quote_char: '~' }
|
55
70
|
end
|
71
|
+
|
72
|
+
def save_objects
|
73
|
+
raise NotImplementedError
|
74
|
+
end
|
56
75
|
end
|
57
76
|
end
|
58
77
|
end
|
@@ -19,6 +19,16 @@ module UsdaNutrientDatabase
|
|
19
19
|
def log_import_started
|
20
20
|
UsdaNutrientDatabase.log 'Importing food groups'
|
21
21
|
end
|
22
|
+
|
23
|
+
def save_objects
|
24
|
+
UsdaNutrientDatabase::FoodGroup.import(columns, objects_to_import, {
|
25
|
+
validate: false,
|
26
|
+
on_duplicate_key_update: {
|
27
|
+
conflict_target: :code,
|
28
|
+
columns: %i(description)
|
29
|
+
}
|
30
|
+
})
|
31
|
+
end
|
22
32
|
end
|
23
33
|
end
|
24
34
|
end
|
@@ -32,6 +32,16 @@ module UsdaNutrientDatabase
|
|
32
32
|
:protein_factor, :fat_factor, :carbohydrate_factor
|
33
33
|
]
|
34
34
|
end
|
35
|
+
|
36
|
+
def save_objects
|
37
|
+
UsdaNutrientDatabase::Food.import(columns, objects_to_import, {
|
38
|
+
validate: false,
|
39
|
+
on_duplicate_key_update: {
|
40
|
+
conflict_target: :nutrient_databank_number,
|
41
|
+
columns: columns
|
42
|
+
}
|
43
|
+
})
|
44
|
+
end
|
35
45
|
end
|
36
46
|
end
|
37
47
|
end
|
@@ -4,6 +4,11 @@ module UsdaNutrientDatabase
|
|
4
4
|
|
5
5
|
private
|
6
6
|
|
7
|
+
def apply_typecasts(row)
|
8
|
+
row[8] = row[8] == 'Y'
|
9
|
+
row
|
10
|
+
end
|
11
|
+
|
7
12
|
def columns
|
8
13
|
[
|
9
14
|
:nutrient_databank_number, :nutrient_number, :nutrient_value,
|
@@ -15,8 +20,9 @@ module UsdaNutrientDatabase
|
|
15
20
|
end
|
16
21
|
|
17
22
|
def find_or_initialize(row)
|
18
|
-
UsdaNutrientDatabase::FoodsNutrient.
|
19
|
-
nutrient_databank_number: row[0],
|
23
|
+
UsdaNutrientDatabase::FoodsNutrient.new(
|
24
|
+
nutrient_databank_number: row[0],
|
25
|
+
nutrient_number: row[3]
|
20
26
|
)
|
21
27
|
end
|
22
28
|
|
@@ -27,6 +33,26 @@ module UsdaNutrientDatabase
|
|
27
33
|
def log_import_started
|
28
34
|
UsdaNutrientDatabase.log 'Importing foods_nutrients'
|
29
35
|
end
|
36
|
+
|
37
|
+
def save_objects
|
38
|
+
options = {
|
39
|
+
batch_size: UsdaNutrientDatabase.batch_size,
|
40
|
+
validate: false
|
41
|
+
}
|
42
|
+
if UsdaNutrientDatabase::FoodsNutrient.exists?
|
43
|
+
options.merge!(
|
44
|
+
on_duplicate_key_update: {
|
45
|
+
conflict_target: %i(nutrient_databank_number nutrient_number),
|
46
|
+
columns: columns
|
47
|
+
}
|
48
|
+
)
|
49
|
+
end
|
50
|
+
UsdaNutrientDatabase::FoodsNutrient.import(
|
51
|
+
columns,
|
52
|
+
objects_to_import,
|
53
|
+
options
|
54
|
+
)
|
55
|
+
end
|
30
56
|
end
|
31
57
|
end
|
32
58
|
end
|
@@ -25,6 +25,16 @@ module UsdaNutrientDatabase
|
|
25
25
|
def log_import_started
|
26
26
|
UsdaNutrientDatabase.log 'Importing footnotes'
|
27
27
|
end
|
28
|
+
|
29
|
+
def save_objects
|
30
|
+
UsdaNutrientDatabase::Footnote.import(columns, objects_to_import, {
|
31
|
+
validate: false,
|
32
|
+
on_duplicate_key_update: {
|
33
|
+
conflict_target: %i(nutrient_databank_number nutrient_number footnote_number),
|
34
|
+
columns: columns
|
35
|
+
}
|
36
|
+
})
|
37
|
+
end
|
28
38
|
end
|
29
39
|
end
|
30
40
|
end
|
@@ -23,6 +23,16 @@ module UsdaNutrientDatabase
|
|
23
23
|
def log_import_started
|
24
24
|
UsdaNutrientDatabase.log 'Importing nutrients'
|
25
25
|
end
|
26
|
+
|
27
|
+
def save_objects
|
28
|
+
UsdaNutrientDatabase::Nutrient.import(columns, objects_to_import, {
|
29
|
+
validate: false,
|
30
|
+
on_duplicate_key_update: {
|
31
|
+
conflict_target: :nutrient_number,
|
32
|
+
columns: columns
|
33
|
+
}
|
34
|
+
})
|
35
|
+
end
|
26
36
|
end
|
27
37
|
end
|
28
38
|
end
|
@@ -13,12 +13,22 @@ module UsdaNutrientDatabase
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def log_import_started
|
16
|
-
UsdaNutrientDatabase.log '
|
16
|
+
UsdaNutrientDatabase.log 'Import source codes'
|
17
17
|
end
|
18
18
|
|
19
19
|
def filename
|
20
20
|
'SRC_CD.txt'
|
21
21
|
end
|
22
|
+
|
23
|
+
def save_objects
|
24
|
+
UsdaNutrientDatabase::SourceCode.import(columns, objects_to_import, {
|
25
|
+
validate: false,
|
26
|
+
on_duplicate_key_update: {
|
27
|
+
conflict_target: :code,
|
28
|
+
columns: columns
|
29
|
+
}
|
30
|
+
})
|
31
|
+
end
|
22
32
|
end
|
23
33
|
end
|
24
34
|
end
|
@@ -26,6 +26,16 @@ module UsdaNutrientDatabase
|
|
26
26
|
def log_import_started
|
27
27
|
UsdaNutrientDatabase.log 'Importing weights'
|
28
28
|
end
|
29
|
+
|
30
|
+
def save_objects
|
31
|
+
UsdaNutrientDatabase::Weight.import(columns, objects_to_import, {
|
32
|
+
validate: false,
|
33
|
+
on_duplicate_key_update: {
|
34
|
+
conflict_target: %i(nutrient_databank_number sequence_number),
|
35
|
+
columns: columns
|
36
|
+
}
|
37
|
+
})
|
38
|
+
end
|
29
39
|
end
|
30
40
|
end
|
31
41
|
end
|