contentful-importer 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,432 @@
1
+ require_relative 'mime_content_type'
2
+ require_relative 'data_organizer'
3
+ require 'contentful/management'
4
+ require 'csv'
5
+ require 'yaml'
6
+ require 'api_cache'
7
+
8
+ module Contentful
9
+ module Importer
10
+ class ParallelImporter
11
+
12
+ Encoding.default_external = 'utf-8'
13
+
14
+ attr_reader :space, :config, :logger, :data_organizer
15
+ attr_accessor :content_type
16
+
17
+ def initialize(settings)
18
+ @config = settings
19
+ @logger = Logger.new(STDOUT)
20
+ @data_organizer = DataOrganizer.new(@config)
21
+ Contentful::Management::Client.new(config.config['access_token'], default_locale: config.config['default_locale'] || 'en-US')
22
+ end
23
+
24
+ def create_contentful_model(space)
25
+ initialize_space(space)
26
+ import_content_types
27
+ end
28
+
29
+ def import_data(threads)
30
+ clean_threads_dir_before_import(threads)
31
+ data_organizer.execute(threads)
32
+ import_in_threads
33
+ end
34
+
35
+ def test_credentials
36
+ spaces = Contentful::Management::Space.all
37
+ if spaces.is_a? Contentful::Management::Array
38
+ logger.info 'Contentful Management API credentials: OK'
39
+ end
40
+ rescue NoMethodError => _error
41
+ logger.info 'Contentful Management API credentials: INVALID (check README)'
42
+ end
43
+
44
+ def number_of_threads
45
+ number_of_threads = 0
46
+ Dir.glob("#{config.threads_dir}/*") do |thread|
47
+ number_of_threads += 1 if File.basename(thread).size == 1
48
+ end
49
+ number_of_threads
50
+ end
51
+
52
+ def import_in_threads
53
+ threads = []
54
+ number_of_threads.times do |thread_id|
55
+ threads << Thread.new do
56
+ self.class.new(config).import_entries("#{config.threads_dir}/#{thread_id}", config.space_id)
57
+ end
58
+ end
59
+ threads.each do |thread|
60
+ thread.join
61
+ end
62
+ end
63
+
64
+ def import_entries(path, space_id)
65
+ log_file_name = "success_thread_#{File.basename(path)}"
66
+ create_log_file(log_file_name)
67
+ load_log_files
68
+ Dir.glob("#{path}/*.json") do |entry_path|
69
+ content_type_id = File.basename(entry_path).match(/(.+)_\d+/)[1]
70
+ entry_file_name = File.basename(entry_path)
71
+ import_entry(entry_path, space_id, content_type_id, log_file_name) unless config.imported_entries.flatten.include?(entry_file_name)
72
+ end
73
+ end
74
+
75
+ def import_only_assets
76
+ create_log_file('success_assets')
77
+ assets_ids = Set.new(CSV.read("#{config.data_dir}/logs/success_assets.csv", 'r'))
78
+ Dir.glob("#{config.assets_dir}/**/*json") do |file_path|
79
+ asset_attributes = JSON.parse(File.read(file_path))
80
+ if asset_url_param_start_with_http?(asset_attributes) && asset_not_imported_yet?(asset_attributes, assets_ids)
81
+ import_asset(asset_attributes)
82
+ end
83
+ end
84
+ end
85
+
86
+ def import_asset(asset_attributes)
87
+ logger.info "Import asset - #{asset_attributes['id']} "
88
+ asset_title = asset_attributes['name'].present? ? asset_attributes['name'] : asset_attributes['id']
89
+ asset_description = asset_attributes['description'].present? ? asset_attributes['description'] : ''
90
+ asset_file = create_asset_file(asset_title, asset_attributes)
91
+ space = Contentful::Management::Space.find(config.config['space_id'])
92
+ asset = space.assets.create(id: "#{asset_attributes['id']}", title: "#{asset_title}", description: asset_description, file: asset_file)
93
+ asset_status(asset, asset_attributes)
94
+ end
95
+
96
+ def asset_url_param_start_with_http?(asset_attributes)
97
+ asset_attributes['url'] && asset_attributes['url'].start_with?('http')
98
+ end
99
+
100
+ def asset_not_imported_yet?(asset_attributes, assets_ids)
101
+ !assets_ids.to_a.flatten.include?(asset_attributes['id'])
102
+ end
103
+
104
+ def create_asset_file(asset_title, params)
105
+ Contentful::Management::File.new.tap do |file|
106
+ file.properties[:contentType] = file_content_type(params)
107
+ file.properties[:fileName] = asset_title
108
+ file.properties[:upload] = params['url']
109
+ end
110
+ end
111
+
112
+ def asset_status(asset, asset_attributes)
113
+ if asset.is_a?(Contentful::Management::Asset)
114
+ logger.info "Process asset - #{asset.id} "
115
+ asset.process_file
116
+ CSV.open("#{config.log_files_dir}/success_assets.csv", 'a') { |csv| csv << [asset.id] }
117
+ else
118
+ logger.info "Error - #{asset.message} "
119
+ CSV.open("#{config.log_files_dir}/failure_assets.csv", 'a') { |csv| csv << [asset_attributes['id']] }
120
+ end
121
+ end
122
+
123
+ def publish_entries_in_threads
124
+ threads =[]
125
+ number_of_threads.times do |thread_id|
126
+ threads << Thread.new do
127
+ self.class.new(config).publish_all_entries("#{config.threads_dir}/#{thread_id}")
128
+ end
129
+ end
130
+ threads.each do |thread|
131
+ thread.join
132
+ end
133
+ end
134
+
135
+ def publish_assets_in_threads(number_of_threads)
136
+ clean_assets_threads_dir_before_publish(number_of_threads)
137
+ data_organizer.split_assets_to_threads(number_of_threads)
138
+ threads =[]
139
+ number_of_threads.times do |thread_id|
140
+ threads << Thread.new do
141
+ self.class.new(config).publish_assets("#{config.threads_dir}/assets/#{thread_id}")
142
+ end
143
+ end
144
+ threads.each do |thread|
145
+ thread.join
146
+ end
147
+ end
148
+
149
+ def publish_assets(thread_dir)
150
+ create_log_file('success_published_assets')
151
+ config.published_assets << CSV.read("#{config.log_files_dir}/success_published_assets.csv", 'r').flatten
152
+ Dir.glob("#{thread_dir}/*json") do |asset_file|
153
+ asset_id = JSON.parse(File.read(asset_file))['id']
154
+ publish_asset(asset_id) unless config.published_assets.flatten.include?(asset_id)
155
+ end
156
+ end
157
+
158
+ def publish_asset(asset_id)
159
+ logger.info "Publish an Asset - ID: #{asset_id}"
160
+ asset = Contentful::Management::Asset.find(config.config['space_id'], asset_id).publish
161
+ publish_status(asset, asset_id, 'published_assets')
162
+ end
163
+
164
+ def publish_all_entries(thread_dir)
165
+ create_log_file('success_published_entries')
166
+ config.published_entries << CSV.read("#{config.log_files_dir}/success_published_entries.csv", 'r').flatten
167
+ Dir.glob("#{thread_dir}/*json") do |entry_file|
168
+ entry_id = JSON.parse(File.read(entry_file))['id']
169
+ publish_entry(entry_id) unless config.published_entries.flatten.include?(entry_id)
170
+ end
171
+ end
172
+
173
+ def publish_entry(entry_id)
174
+ logger.info "Publish entries for #{entry_id}."
175
+ entry = Contentful::Management::Entry.find(config.config['space_id'], entry_id).publish
176
+ publish_status(entry, entry_id, 'published_entries')
177
+ end
178
+
179
+ private
180
+
181
+ def initialize_space(space)
182
+ fail 'You need to specify \'--space_id\' argument to find an existing Space or \'--space_name\' to create a new Space.' if space[:space_id].nil? && [:space_name].nil?
183
+ @space = space[:space_id].present? ? Contentful::Management::Space.find(space[:space_id]) : create_space(space[:space_name])
184
+ end
185
+
186
+ def create_space(name_space)
187
+ logger.info "Creating a space with name: #{name_space}"
188
+ new_space = Contentful::Management::Space.create(name: name_space, organization_id: config.config['organization_id'])
189
+ logger.info "Space was created successfully! Space id: #{new_space.id}"
190
+ new_space
191
+ end
192
+
193
+ def import_content_types
194
+ Dir.glob("#{config.collections_dir}/*json") do |file_path|
195
+ collection_attributes = JSON.parse(File.read(file_path))
196
+ content_type = create_new_content_type(space, collection_attributes)
197
+ logger.info "Importing content_type: #{content_type.name}"
198
+ create_content_type_fields(collection_attributes, content_type)
199
+ content_type.update(displayField: collection_attributes['displayField']) if collection_attributes['displayField']
200
+ active_status(content_type.activate)
201
+ end
202
+ end
203
+
204
+ def get_id(params)
205
+ File.basename(params['id'] || params['url'])
206
+ end
207
+
208
+ def create_content_type_fields(collection_attributes, content_type)
209
+ fields = collection_attributes['fields'].each_with_object([]) do |field, fields|
210
+ fields << create_field(field)
211
+ end
212
+ content_type.fields = fields
213
+ content_type.save
214
+ end
215
+
216
+ def import_entry(file_path, space_id, content_type_id, log_file)
217
+ entry_attributes = JSON.parse(File.read(file_path))
218
+ logger.info "Creating entry: #{entry_attributes['id']}."
219
+ entry_params = create_entry_parameters(content_type_id, entry_attributes, space_id)
220
+ content_type = content_type(content_type_id, space_id)
221
+ entry = content_type.entries.create(entry_params)
222
+ import_status(entry, file_path, log_file)
223
+ end
224
+
225
+ def create_entry_parameters(content_type_id, entry_attributes, space_id)
226
+ entry_attributes.each_with_object({}) do |(attr, value), entry_params|
227
+ next if attr.start_with?('@')
228
+ entry_param = if value.is_a? Hash
229
+ parse_attributes_from_hash(value, space_id, content_type_id)
230
+ elsif value.is_a? Array
231
+ parse_attributes_from_array(value, space_id, content_type_id)
232
+ else
233
+ value
234
+ end
235
+ entry_params[attr.to_sym] = entry_param unless validate_param(entry_param)
236
+ end
237
+ end
238
+
239
+ def parse_attributes_from_hash(params, space_id, content_type_id)
240
+ type = params['type']
241
+ if type
242
+ case type
243
+ when 'Location'
244
+ create_location_file(params)
245
+ when 'File'
246
+ create_asset(space_id, params)
247
+ else
248
+ create_entry(params, space_id, content_type_id)
249
+ end
250
+ else
251
+ params
252
+ end
253
+ end
254
+
255
+ def parse_attributes_from_array(params, space_id, content_type_id)
256
+ params.each_with_object([]) do |attr, array_attributes|
257
+ value = if attr['type'].present? && attr['type'] != 'File'
258
+ create_entry(attr, space_id, content_type_id)
259
+ elsif attr['type'] == 'File'
260
+ create_asset(space_id, attr)
261
+ else
262
+ attr
263
+ end
264
+ array_attributes << value unless value.nil?
265
+ end
266
+ end
267
+
268
+ def import_status(entry, file_path, log_file)
269
+ if entry.is_a? Contentful::Management::Entry
270
+ entry_file_name = File.basename(file_path)
271
+ logger.info 'Imported successfully!'
272
+ CSV.open("#{config.log_files_dir}/#{log_file}.csv", 'a') { |csv| csv << [entry_file_name] }
273
+ else
274
+ logger.info "### Failure! - #{entry.message} - #{entry.response.raw}###"
275
+ failure_filename = log_file.match(/(thread_\d)/)[1]
276
+ CSV.open("#{config.log_files_dir}/failure_#{failure_filename}.csv", 'a') { |csv| csv << [file_path, entry.message, entry.response.raw] }
277
+ end
278
+ end
279
+
280
+ def content_type(content_type_id, space_id)
281
+ @content_type = APICache.get("content_type_#{content_type_id}", :period => -5) do
282
+ Contentful::Management::ContentType.find(space_id, content_type_id)
283
+ end
284
+ end
285
+
286
+ def create_entry(params, space_id, content_type_id)
287
+ entry_id = get_id(params)
288
+ content_type = content_type(content_type_id, space_id)
289
+ content_type.entries.new.tap do |entry|
290
+ entry.id = entry_id
291
+ end
292
+ end
293
+
294
+ def create_asset(space_id, params)
295
+ if params['id']
296
+ space = Contentful::Management::Space.find(space_id)
297
+ found_asset = space.assets.find(params['id'])
298
+ asset = found_asset.is_a?(Contentful::Management::Asset) ? found_asset : initialize_asset_file(params)
299
+ asset
300
+ end
301
+ end
302
+
303
+ def initialize_asset_file(params)
304
+ Contentful::Management::Asset.new.tap do |asset|
305
+ asset.id = params['id']
306
+ asset.link_type = 'Asset'
307
+ end
308
+ end
309
+
310
+ def create_location_file(params)
311
+ Contentful::Management::Location.new.tap do |file|
312
+ file.lat = params['lat']
313
+ file.lon = params['lng']
314
+ end
315
+ end
316
+
317
+ def create_field(field)
318
+ field_params = {id: field['id'], name: field['name'], required: field['required']}
319
+ field_params.merge!(additional_field_params(field))
320
+ logger.info "Creating field: #{field_params[:type]}"
321
+ create_content_type_field(field_params)
322
+ end
323
+
324
+ def create_content_type_field(field_params)
325
+ Contentful::Management::Field.new.tap do |field|
326
+ field.id = field_params[:id]
327
+ field.name = field_params[:name]
328
+ field.type = field_params[:type]
329
+ field.link_type = field_params[:link_type]
330
+ field.required = field_params[:required]
331
+ field.items = field_params[:items]
332
+ end
333
+ end
334
+
335
+ def active_status(ct_object)
336
+ if ct_object.is_a? Contentful::Management::Error
337
+ logger.info "### Failure! - #{ct_object.message} - #{ct_object.response.raw} ###"
338
+ else
339
+ logger.info 'Successfully activated!'
340
+ end
341
+ end
342
+
343
+ def publish_status(ct_object, object_id, log_file_name)
344
+ if ct_object.is_a? Contentful::Management::Error
345
+ logger.info "### Failure! - #{ct_object.message} - #{ct_object.response.raw} ###"
346
+ CSV.open("#{config.log_files_dir}/failure_#{log_file_name}.csv", 'a') { |csv| csv << [object_id, ct_object.message, ct_object.response.raw] }
347
+ else
348
+ logger.info 'Successfully activated!'
349
+ CSV.open("#{config.log_files_dir}/success_#{log_file_name}.csv", 'a') { |csv| csv << [ct_object.id] }
350
+ end
351
+ end
352
+
353
+ def additional_field_params(field)
354
+ field_type = field['type']
355
+ if field_type == 'Entry' || field_type == 'Asset'
356
+ {type: 'Link', link_type: field_type}
357
+ elsif field_type == 'Array'
358
+ {type: 'Array', items: create_array_field(field)}
359
+ else
360
+ {type: field_type}
361
+ end
362
+ end
363
+
364
+ def validate_param(param)
365
+ if param.is_a? Array
366
+ param.empty?
367
+ else
368
+ param.nil?
369
+ end
370
+ end
371
+
372
+ def create_new_content_type(space, collection_attributes)
373
+ space.content_types.new.tap do |content_type|
374
+ content_type.id = collection_attributes['id']
375
+ content_type.name = collection_attributes['name']
376
+ content_type.description = collection_attributes['description']
377
+ end
378
+ end
379
+
380
+ def file_content_type(params)
381
+ params['contentType'].present? ? params['contentType'] : MimeContentType::EXTENSION_LIST[File.extname(params['url'])]
382
+ end
383
+
384
+ def format_json(item)
385
+ JSON.pretty_generate(JSON.parse(item.to_json))
386
+ end
387
+
388
+ def create_array_field(params)
389
+ Contentful::Management::Field.new.tap do |field|
390
+ field.type = params['link'] || 'Link'
391
+ field.link_type = params['link_type']
392
+ end
393
+ end
394
+
395
+ def clean_threads_dir_before_import(threads)
396
+ threads.times do |thread|
397
+ if File.directory?("#{config.threads_dir}/#{thread}")
398
+ logger.info "Remove directory threads/#{thread} from #{config.threads_dir} path."
399
+ FileUtils.rm_r("#{config.threads_dir}/#{thread}")
400
+ end
401
+ end
402
+ end
403
+
404
+ def clean_assets_threads_dir_before_publish(threads)
405
+ threads.times do |thread|
406
+ if File.directory?("#{config.threads_dir}/assets/#{thread}")
407
+ logger.info "Remove directory threads/#{thread} from #{config.threads_dir}/assets path."
408
+ FileUtils.rm_r("#{config.threads_dir}/assets/#{thread}")
409
+ end
410
+ end
411
+ end
412
+
413
+ def create_directory(path)
414
+ FileUtils.mkdir_p(path) unless File.directory?(path)
415
+ end
416
+
417
+ def create_log_file(path)
418
+ create_directory("#{config.data_dir}/logs")
419
+ File.open("#{config.data_dir}/logs/#{path}.csv", 'a') { |file| file.write('') }
420
+ end
421
+
422
+ def load_log_files
423
+ Dir.glob("#{config.log_files_dir}/*.csv") do |log_files|
424
+ file_name = File.basename(log_files)
425
+ imported_ids = CSV.read(log_files, 'r').flatten
426
+ config.imported_entries << imported_ids if file_name.start_with?('success_thread') && !config.imported_entries.include?(imported_ids)
427
+ end
428
+ end
429
+
430
+ end
431
+ end
432
+ end
@@ -0,0 +1,5 @@
1
+ module Contentful
2
+ module Importer
3
+ VERSION = '0.1.0'
4
+ end
5
+ end
@@ -1,22 +1,24 @@
1
1
  require 'spec_helper'
2
- require './lib/configuration'
2
+ require './lib/contentful/importer/configuration'
3
3
 
4
4
  module Contentful
5
- describe Configuration do
5
+ module Importer
6
+ describe Configuration do
6
7
 
7
- include_context 'shared_configuration'
8
+ include_context 'shared_configuration'
8
9
 
9
- it 'initialize' do
10
- expect(@config.data_dir).to eq 'spec/fixtures/import_files'
11
- expect(@config.collections_dir).to eq 'spec/fixtures/import_files/collections'
12
- expect(@config.assets_dir).to eq 'spec/fixtures/import_files/assets'
13
- expect(@config.entries_dir).to eq 'spec/fixtures/import_files/entries'
14
- expect(@config.log_files_dir).to eq 'spec/fixtures/import_files/logs'
15
- expect(@config.threads_dir).to eq 'spec/fixtures/import_files/threads'
16
- expect(@config.imported_entries).to be_empty
17
- expect(@config.published_entries).to be_empty
18
- expect(@config.published_assets).to be_empty
19
- expect(@config.space_id).to eq 'ip17s12q0ek4'
10
+ it 'initialize' do
11
+ expect(@config.data_dir).to eq 'spec/fixtures/import_files'
12
+ expect(@config.collections_dir).to eq 'spec/fixtures/import_files/collections'
13
+ expect(@config.assets_dir).to eq 'spec/fixtures/import_files/assets'
14
+ expect(@config.entries_dir).to eq 'spec/fixtures/import_files/entries'
15
+ expect(@config.log_files_dir).to eq 'spec/fixtures/import_files/logs'
16
+ expect(@config.threads_dir).to eq 'spec/fixtures/import_files/threads'
17
+ expect(@config.imported_entries).to be_empty
18
+ expect(@config.published_entries).to be_empty
19
+ expect(@config.published_assets).to be_empty
20
+ expect(@config.space_id).to eq 'ip17s12q0ek4'
21
+ end
20
22
  end
21
23
  end
22
24
  end