contentful-importer 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,432 @@
1
+ require_relative 'mime_content_type'
2
+ require_relative 'data_organizer'
3
+ require 'contentful/management'
4
+ require 'csv'
5
+ require 'yaml'
6
+ require 'api_cache'
7
+
8
+ module Contentful
9
+ module Importer
10
+ class ParallelImporter
11
+
12
+ Encoding.default_external = 'utf-8'
13
+
14
+ attr_reader :space, :config, :logger, :data_organizer
15
+ attr_accessor :content_type
16
+
17
+ def initialize(settings)
18
+ @config = settings
19
+ @logger = Logger.new(STDOUT)
20
+ @data_organizer = DataOrganizer.new(@config)
21
+ Contentful::Management::Client.new(config.config['access_token'], default_locale: config.config['default_locale'] || 'en-US')
22
+ end
23
+
24
+ def create_contentful_model(space)
25
+ initialize_space(space)
26
+ import_content_types
27
+ end
28
+
29
+ def import_data(threads)
30
+ clean_threads_dir_before_import(threads)
31
+ data_organizer.execute(threads)
32
+ import_in_threads
33
+ end
34
+
35
+ def test_credentials
36
+ spaces = Contentful::Management::Space.all
37
+ if spaces.is_a? Contentful::Management::Array
38
+ logger.info 'Contentful Management API credentials: OK'
39
+ end
40
+ rescue NoMethodError => _error
41
+ logger.info 'Contentful Management API credentials: INVALID (check README)'
42
+ end
43
+
44
+ def number_of_threads
45
+ number_of_threads = 0
46
+ Dir.glob("#{config.threads_dir}/*") do |thread|
47
+ number_of_threads += 1 if File.basename(thread).size == 1
48
+ end
49
+ number_of_threads
50
+ end
51
+
52
+ def import_in_threads
53
+ threads = []
54
+ number_of_threads.times do |thread_id|
55
+ threads << Thread.new do
56
+ self.class.new(config).import_entries("#{config.threads_dir}/#{thread_id}", config.space_id)
57
+ end
58
+ end
59
+ threads.each do |thread|
60
+ thread.join
61
+ end
62
+ end
63
+
64
+ def import_entries(path, space_id)
65
+ log_file_name = "success_thread_#{File.basename(path)}"
66
+ create_log_file(log_file_name)
67
+ load_log_files
68
+ Dir.glob("#{path}/*.json") do |entry_path|
69
+ content_type_id = File.basename(entry_path).match(/(.+)_\d+/)[1]
70
+ entry_file_name = File.basename(entry_path)
71
+ import_entry(entry_path, space_id, content_type_id, log_file_name) unless config.imported_entries.flatten.include?(entry_file_name)
72
+ end
73
+ end
74
+
75
+ def import_only_assets
76
+ create_log_file('success_assets')
77
+ assets_ids = Set.new(CSV.read("#{config.data_dir}/logs/success_assets.csv", 'r'))
78
+ Dir.glob("#{config.assets_dir}/**/*json") do |file_path|
79
+ asset_attributes = JSON.parse(File.read(file_path))
80
+ if asset_url_param_start_with_http?(asset_attributes) && asset_not_imported_yet?(asset_attributes, assets_ids)
81
+ import_asset(asset_attributes)
82
+ end
83
+ end
84
+ end
85
+
86
+ def import_asset(asset_attributes)
87
+ logger.info "Import asset - #{asset_attributes['id']} "
88
+ asset_title = asset_attributes['name'].present? ? asset_attributes['name'] : asset_attributes['id']
89
+ asset_description = asset_attributes['description'].present? ? asset_attributes['description'] : ''
90
+ asset_file = create_asset_file(asset_title, asset_attributes)
91
+ space = Contentful::Management::Space.find(config.config['space_id'])
92
+ asset = space.assets.create(id: "#{asset_attributes['id']}", title: "#{asset_title}", description: asset_description, file: asset_file)
93
+ asset_status(asset, asset_attributes)
94
+ end
95
+
96
+ def asset_url_param_start_with_http?(asset_attributes)
97
+ asset_attributes['url'] && asset_attributes['url'].start_with?('http')
98
+ end
99
+
100
+ def asset_not_imported_yet?(asset_attributes, assets_ids)
101
+ !assets_ids.to_a.flatten.include?(asset_attributes['id'])
102
+ end
103
+
104
+ def create_asset_file(asset_title, params)
105
+ Contentful::Management::File.new.tap do |file|
106
+ file.properties[:contentType] = file_content_type(params)
107
+ file.properties[:fileName] = asset_title
108
+ file.properties[:upload] = params['url']
109
+ end
110
+ end
111
+
112
+ def asset_status(asset, asset_attributes)
113
+ if asset.is_a?(Contentful::Management::Asset)
114
+ logger.info "Process asset - #{asset.id} "
115
+ asset.process_file
116
+ CSV.open("#{config.log_files_dir}/success_assets.csv", 'a') { |csv| csv << [asset.id] }
117
+ else
118
+ logger.info "Error - #{asset.message} "
119
+ CSV.open("#{config.log_files_dir}/failure_assets.csv", 'a') { |csv| csv << [asset_attributes['id']] }
120
+ end
121
+ end
122
+
123
+ def publish_entries_in_threads
124
+ threads =[]
125
+ number_of_threads.times do |thread_id|
126
+ threads << Thread.new do
127
+ self.class.new(config).publish_all_entries("#{config.threads_dir}/#{thread_id}")
128
+ end
129
+ end
130
+ threads.each do |thread|
131
+ thread.join
132
+ end
133
+ end
134
+
135
+ def publish_assets_in_threads(number_of_threads)
136
+ clean_assets_threads_dir_before_publish(number_of_threads)
137
+ data_organizer.split_assets_to_threads(number_of_threads)
138
+ threads =[]
139
+ number_of_threads.times do |thread_id|
140
+ threads << Thread.new do
141
+ self.class.new(config).publish_assets("#{config.threads_dir}/assets/#{thread_id}")
142
+ end
143
+ end
144
+ threads.each do |thread|
145
+ thread.join
146
+ end
147
+ end
148
+
149
+ def publish_assets(thread_dir)
150
+ create_log_file('success_published_assets')
151
+ config.published_assets << CSV.read("#{config.log_files_dir}/success_published_assets.csv", 'r').flatten
152
+ Dir.glob("#{thread_dir}/*json") do |asset_file|
153
+ asset_id = JSON.parse(File.read(asset_file))['id']
154
+ publish_asset(asset_id) unless config.published_assets.flatten.include?(asset_id)
155
+ end
156
+ end
157
+
158
+ def publish_asset(asset_id)
159
+ logger.info "Publish an Asset - ID: #{asset_id}"
160
+ asset = Contentful::Management::Asset.find(config.config['space_id'], asset_id).publish
161
+ publish_status(asset, asset_id, 'published_assets')
162
+ end
163
+
164
+ def publish_all_entries(thread_dir)
165
+ create_log_file('success_published_entries')
166
+ config.published_entries << CSV.read("#{config.log_files_dir}/success_published_entries.csv", 'r').flatten
167
+ Dir.glob("#{thread_dir}/*json") do |entry_file|
168
+ entry_id = JSON.parse(File.read(entry_file))['id']
169
+ publish_entry(entry_id) unless config.published_entries.flatten.include?(entry_id)
170
+ end
171
+ end
172
+
173
+ def publish_entry(entry_id)
174
+ logger.info "Publish entries for #{entry_id}."
175
+ entry = Contentful::Management::Entry.find(config.config['space_id'], entry_id).publish
176
+ publish_status(entry, entry_id, 'published_entries')
177
+ end
178
+
179
+ private
180
+
181
+ def initialize_space(space)
182
+ fail 'You need to specify \'--space_id\' argument to find an existing Space or \'--space_name\' to create a new Space.' if space[:space_id].nil? && [:space_name].nil?
183
+ @space = space[:space_id].present? ? Contentful::Management::Space.find(space[:space_id]) : create_space(space[:space_name])
184
+ end
185
+
186
+ def create_space(name_space)
187
+ logger.info "Creating a space with name: #{name_space}"
188
+ new_space = Contentful::Management::Space.create(name: name_space, organization_id: config.config['organization_id'])
189
+ logger.info "Space was created successfully! Space id: #{new_space.id}"
190
+ new_space
191
+ end
192
+
193
+ def import_content_types
194
+ Dir.glob("#{config.collections_dir}/*json") do |file_path|
195
+ collection_attributes = JSON.parse(File.read(file_path))
196
+ content_type = create_new_content_type(space, collection_attributes)
197
+ logger.info "Importing content_type: #{content_type.name}"
198
+ create_content_type_fields(collection_attributes, content_type)
199
+ content_type.update(displayField: collection_attributes['displayField']) if collection_attributes['displayField']
200
+ active_status(content_type.activate)
201
+ end
202
+ end
203
+
204
+ def get_id(params)
205
+ File.basename(params['id'] || params['url'])
206
+ end
207
+
208
+ def create_content_type_fields(collection_attributes, content_type)
209
+ fields = collection_attributes['fields'].each_with_object([]) do |field, fields|
210
+ fields << create_field(field)
211
+ end
212
+ content_type.fields = fields
213
+ content_type.save
214
+ end
215
+
216
+ def import_entry(file_path, space_id, content_type_id, log_file)
217
+ entry_attributes = JSON.parse(File.read(file_path))
218
+ logger.info "Creating entry: #{entry_attributes['id']}."
219
+ entry_params = create_entry_parameters(content_type_id, entry_attributes, space_id)
220
+ content_type = content_type(content_type_id, space_id)
221
+ entry = content_type.entries.create(entry_params)
222
+ import_status(entry, file_path, log_file)
223
+ end
224
+
225
+ def create_entry_parameters(content_type_id, entry_attributes, space_id)
226
+ entry_attributes.each_with_object({}) do |(attr, value), entry_params|
227
+ next if attr.start_with?('@')
228
+ entry_param = if value.is_a? Hash
229
+ parse_attributes_from_hash(value, space_id, content_type_id)
230
+ elsif value.is_a? Array
231
+ parse_attributes_from_array(value, space_id, content_type_id)
232
+ else
233
+ value
234
+ end
235
+ entry_params[attr.to_sym] = entry_param unless validate_param(entry_param)
236
+ end
237
+ end
238
+
239
+ def parse_attributes_from_hash(params, space_id, content_type_id)
240
+ type = params['type']
241
+ if type
242
+ case type
243
+ when 'Location'
244
+ create_location_file(params)
245
+ when 'File'
246
+ create_asset(space_id, params)
247
+ else
248
+ create_entry(params, space_id, content_type_id)
249
+ end
250
+ else
251
+ params
252
+ end
253
+ end
254
+
255
+ def parse_attributes_from_array(params, space_id, content_type_id)
256
+ params.each_with_object([]) do |attr, array_attributes|
257
+ value = if attr['type'].present? && attr['type'] != 'File'
258
+ create_entry(attr, space_id, content_type_id)
259
+ elsif attr['type'] == 'File'
260
+ create_asset(space_id, attr)
261
+ else
262
+ attr
263
+ end
264
+ array_attributes << value unless value.nil?
265
+ end
266
+ end
267
+
268
+ def import_status(entry, file_path, log_file)
269
+ if entry.is_a? Contentful::Management::Entry
270
+ entry_file_name = File.basename(file_path)
271
+ logger.info 'Imported successfully!'
272
+ CSV.open("#{config.log_files_dir}/#{log_file}.csv", 'a') { |csv| csv << [entry_file_name] }
273
+ else
274
+ logger.info "### Failure! - #{entry.message} - #{entry.response.raw}###"
275
+ failure_filename = log_file.match(/(thread_\d)/)[1]
276
+ CSV.open("#{config.log_files_dir}/failure_#{failure_filename}.csv", 'a') { |csv| csv << [file_path, entry.message, entry.response.raw] }
277
+ end
278
+ end
279
+
280
+ def content_type(content_type_id, space_id)
281
+ @content_type = APICache.get("content_type_#{content_type_id}", :period => -5) do
282
+ Contentful::Management::ContentType.find(space_id, content_type_id)
283
+ end
284
+ end
285
+
286
+ def create_entry(params, space_id, content_type_id)
287
+ entry_id = get_id(params)
288
+ content_type = content_type(content_type_id, space_id)
289
+ content_type.entries.new.tap do |entry|
290
+ entry.id = entry_id
291
+ end
292
+ end
293
+
294
+ def create_asset(space_id, params)
295
+ if params['id']
296
+ space = Contentful::Management::Space.find(space_id)
297
+ found_asset = space.assets.find(params['id'])
298
+ asset = found_asset.is_a?(Contentful::Management::Asset) ? found_asset : initialize_asset_file(params)
299
+ asset
300
+ end
301
+ end
302
+
303
+ def initialize_asset_file(params)
304
+ Contentful::Management::Asset.new.tap do |asset|
305
+ asset.id = params['id']
306
+ asset.link_type = 'Asset'
307
+ end
308
+ end
309
+
310
+ def create_location_file(params)
311
+ Contentful::Management::Location.new.tap do |file|
312
+ file.lat = params['lat']
313
+ file.lon = params['lng']
314
+ end
315
+ end
316
+
317
+ def create_field(field)
318
+ field_params = {id: field['id'], name: field['name'], required: field['required']}
319
+ field_params.merge!(additional_field_params(field))
320
+ logger.info "Creating field: #{field_params[:type]}"
321
+ create_content_type_field(field_params)
322
+ end
323
+
324
+ def create_content_type_field(field_params)
325
+ Contentful::Management::Field.new.tap do |field|
326
+ field.id = field_params[:id]
327
+ field.name = field_params[:name]
328
+ field.type = field_params[:type]
329
+ field.link_type = field_params[:link_type]
330
+ field.required = field_params[:required]
331
+ field.items = field_params[:items]
332
+ end
333
+ end
334
+
335
+ def active_status(ct_object)
336
+ if ct_object.is_a? Contentful::Management::Error
337
+ logger.info "### Failure! - #{ct_object.message} - #{ct_object.response.raw} ###"
338
+ else
339
+ logger.info 'Successfully activated!'
340
+ end
341
+ end
342
+
343
+ def publish_status(ct_object, object_id, log_file_name)
344
+ if ct_object.is_a? Contentful::Management::Error
345
+ logger.info "### Failure! - #{ct_object.message} - #{ct_object.response.raw} ###"
346
+ CSV.open("#{config.log_files_dir}/failure_#{log_file_name}.csv", 'a') { |csv| csv << [object_id, ct_object.message, ct_object.response.raw] }
347
+ else
348
+ logger.info 'Successfully activated!'
349
+ CSV.open("#{config.log_files_dir}/success_#{log_file_name}.csv", 'a') { |csv| csv << [ct_object.id] }
350
+ end
351
+ end
352
+
353
+ def additional_field_params(field)
354
+ field_type = field['type']
355
+ if field_type == 'Entry' || field_type == 'Asset'
356
+ {type: 'Link', link_type: field_type}
357
+ elsif field_type == 'Array'
358
+ {type: 'Array', items: create_array_field(field)}
359
+ else
360
+ {type: field_type}
361
+ end
362
+ end
363
+
364
+ def validate_param(param)
365
+ if param.is_a? Array
366
+ param.empty?
367
+ else
368
+ param.nil?
369
+ end
370
+ end
371
+
372
+ def create_new_content_type(space, collection_attributes)
373
+ space.content_types.new.tap do |content_type|
374
+ content_type.id = collection_attributes['id']
375
+ content_type.name = collection_attributes['name']
376
+ content_type.description = collection_attributes['description']
377
+ end
378
+ end
379
+
380
+ def file_content_type(params)
381
+ params['contentType'].present? ? params['contentType'] : MimeContentType::EXTENSION_LIST[File.extname(params['url'])]
382
+ end
383
+
384
+ def format_json(item)
385
+ JSON.pretty_generate(JSON.parse(item.to_json))
386
+ end
387
+
388
+ def create_array_field(params)
389
+ Contentful::Management::Field.new.tap do |field|
390
+ field.type = params['link'] || 'Link'
391
+ field.link_type = params['link_type']
392
+ end
393
+ end
394
+
395
+ def clean_threads_dir_before_import(threads)
396
+ threads.times do |thread|
397
+ if File.directory?("#{config.threads_dir}/#{thread}")
398
+ logger.info "Remove directory threads/#{thread} from #{config.threads_dir} path."
399
+ FileUtils.rm_r("#{config.threads_dir}/#{thread}")
400
+ end
401
+ end
402
+ end
403
+
404
+ def clean_assets_threads_dir_before_publish(threads)
405
+ threads.times do |thread|
406
+ if File.directory?("#{config.threads_dir}/assets/#{thread}")
407
+ logger.info "Remove directory threads/#{thread} from #{config.threads_dir}/assets path."
408
+ FileUtils.rm_r("#{config.threads_dir}/assets/#{thread}")
409
+ end
410
+ end
411
+ end
412
+
413
+ def create_directory(path)
414
+ FileUtils.mkdir_p(path) unless File.directory?(path)
415
+ end
416
+
417
+ def create_log_file(path)
418
+ create_directory("#{config.data_dir}/logs")
419
+ File.open("#{config.data_dir}/logs/#{path}.csv", 'a') { |file| file.write('') }
420
+ end
421
+
422
+ def load_log_files
423
+ Dir.glob("#{config.log_files_dir}/*.csv") do |log_files|
424
+ file_name = File.basename(log_files)
425
+ imported_ids = CSV.read(log_files, 'r').flatten
426
+ config.imported_entries << imported_ids if file_name.start_with?('success_thread') && !config.imported_entries.include?(imported_ids)
427
+ end
428
+ end
429
+
430
+ end
431
+ end
432
+ end
@@ -0,0 +1,5 @@
1
+ module Contentful
2
+ module Importer
3
+ VERSION = '0.1.0'
4
+ end
5
+ end
@@ -1,22 +1,24 @@
1
1
  require 'spec_helper'
2
- require './lib/configuration'
2
+ require './lib/contentful/importer/configuration'
3
3
 
4
4
  module Contentful
5
- describe Configuration do
5
+ module Importer
6
+ describe Configuration do
6
7
 
7
- include_context 'shared_configuration'
8
+ include_context 'shared_configuration'
8
9
 
9
- it 'initialize' do
10
- expect(@config.data_dir).to eq 'spec/fixtures/import_files'
11
- expect(@config.collections_dir).to eq 'spec/fixtures/import_files/collections'
12
- expect(@config.assets_dir).to eq 'spec/fixtures/import_files/assets'
13
- expect(@config.entries_dir).to eq 'spec/fixtures/import_files/entries'
14
- expect(@config.log_files_dir).to eq 'spec/fixtures/import_files/logs'
15
- expect(@config.threads_dir).to eq 'spec/fixtures/import_files/threads'
16
- expect(@config.imported_entries).to be_empty
17
- expect(@config.published_entries).to be_empty
18
- expect(@config.published_assets).to be_empty
19
- expect(@config.space_id).to eq 'ip17s12q0ek4'
10
+ it 'initialize' do
11
+ expect(@config.data_dir).to eq 'spec/fixtures/import_files'
12
+ expect(@config.collections_dir).to eq 'spec/fixtures/import_files/collections'
13
+ expect(@config.assets_dir).to eq 'spec/fixtures/import_files/assets'
14
+ expect(@config.entries_dir).to eq 'spec/fixtures/import_files/entries'
15
+ expect(@config.log_files_dir).to eq 'spec/fixtures/import_files/logs'
16
+ expect(@config.threads_dir).to eq 'spec/fixtures/import_files/threads'
17
+ expect(@config.imported_entries).to be_empty
18
+ expect(@config.published_entries).to be_empty
19
+ expect(@config.published_assets).to be_empty
20
+ expect(@config.space_id).to eq 'ip17s12q0ek4'
21
+ end
20
22
  end
21
23
  end
22
24
  end