reso 0.1.1.0 → 0.1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e4cf4dfcb9e6c4b1ed0a73f10bc721b710762b7
4
- data.tar.gz: c05035bf8cadb7a7044027357488f3e2ec65c709
3
+ metadata.gz: e761f8d043c586346c9cc9c319d279982e19d9f8
4
+ data.tar.gz: 4e6a3b16fc333523cc0da036e3f7cb932b07152e
5
5
  SHA512:
6
- metadata.gz: c533cabc8a83c661b24d254bde866b56ba604107512e10217efbad93712753beb045348d7058ea3a3c833c8b893a490c32a69550ff253c048c2d00b6c0db5281
7
- data.tar.gz: 9573993e629bb919d59776afc8ae4bdc75d1480cccf29c305154e4de4b57d6cbf621dbb85ba2a15a60a10970940c166fcdaf0775789d64fb952ff997fc94be03
6
+ metadata.gz: 621c9d180dd6a11a054484ef4fcecc19d45cc502c1ead96ff6a69c7e89d4023ab0ebd74f4cc33ff71f9609360c76ccb0c4cf9fdd311f238f451576ff78d7309a
7
+ data.tar.gz: d05f9c83ce99ba0c16f65720e893f1449001cf09cbd6c039ad9f5ec03826a5ed4cba3a499e871eeeb5790479a127f9b610d810893f62cda1658baaae4377c208
@@ -1,15 +1,15 @@
1
1
  class Import < ActiveRecord::Base
2
2
 
3
- belongs_to :import_format
3
+ require 'open-uri'
4
+ require 'open_uri_redirections'
4
5
 
6
+ before_save :set_import_format
7
+ belongs_to :import_format
5
8
  has_many :listings
6
9
  has_many :queued_listings
7
-
8
10
  validates_uniqueness_of :token
9
11
 
10
- before_save :set_import_format
11
-
12
- def remove_listings_no_longer_present fresh_listing_keys
12
+ def remove_listings_not_present fresh_listing_keys
13
13
  existing_listing_keys = self.listings.all.pluck(:listing_key)
14
14
  stale_listing_keys = existing_listing_keys.delete_if{|key| fresh_listing_keys.include? key }
15
15
  stale_listing_keys.each do |listing_key|
@@ -22,4 +22,15 @@ class Import < ActiveRecord::Base
22
22
  self.import_format = ImportFormat.find_by(name: 'reso') unless self.import_format.present?
23
23
  end
24
24
 
25
+ def source_url_last_modified
26
+ open(self.source_url,
27
+ http_basic_authentication: [self.source_user, self.source_pass],
28
+ allow_redirections: :all
29
+ ){|f| return f.last_modified }
30
+ end
31
+
32
+ def new_source_data_exists?
33
+ self.source_data_modified.eql? self.source_url_last_modified ? false : true
34
+ end
35
+
25
36
  end
@@ -9,6 +9,7 @@ class CreateImports < ActiveRecord::Migration
9
9
  t.string :source_url
10
10
  t.string :source_user
11
11
  t.string :source_pass
12
+ t.datetime :source_data_modified
12
13
 
13
14
  t.timestamps null: false
14
15
  end
@@ -96,36 +96,44 @@ namespace :reso do
96
96
  import = Import.find_by(token: args.import_token)
97
97
 
98
98
  unless import.blank?
99
- l, count, incoming_listing_keys, stream = 0, 0, [], ''
100
- open_tag, close_tag = get_open_and_closing_tag_for import.repeating_element
99
+ unless import.new_source_data_exists?
100
+ source_data_modified = import.source_url_last_modified
101
+ l, count, found_listing_keys, stream = 0, 0, [], ''
102
+ open_tag, close_tag = get_open_and_closing_tag_for import.repeating_element
101
103
 
102
- # Grab a file to work with
103
- filepath = download_feed_to_import import
104
- filepath = uncompress_and_return_new_filepath(filepath) if filepath.split('.').last.downcase == 'gz'
104
+ # Grab a file to work with
105
+ filepath = download_feed_to_import import
106
+ filepath = uncompress_and_return_new_filepath(filepath) if filepath.split('.').last.downcase == 'gz'
105
107
 
106
- # Grab the XML header to avoid namespace errors later
107
- xml_header = get_xml_header filepath, import.repeating_element
108
+ # Grab the XML header to avoid namespace errors later
109
+ xml_header = get_xml_header filepath, import.repeating_element
108
110
 
109
- puts (start = Time.now)
110
- puts "Starting..." if Rails.env.development?
111
- File.foreach(filepath) do |line|
112
- stream += line
113
- while (from_here = stream.index(open_tag)) && (to_there = stream.index(close_tag))
114
- xml = stream[from_here..to_there + (close_tag.length-1)]
115
- doc = Nokogiri::XML([xml_header, xml].join).remove_namespaces!
116
- incoming_listing_keys << create_queued_listing_and_return_listing_key(doc, import)
117
- stream.gsub!(xml, '')
118
- if ((l += 1) % 1000).zero?
119
- puts "#{l}\t#{l/(Time.now - start)}" if Rails.env.development?
111
+ start_time = Time.now
112
+ import_result = ImportResult.create(import: import, start_time: start_time)
113
+ File.foreach(filepath) do |line|
114
+ stream += line
115
+ while (from_here = stream.index(open_tag)) && (to_there = stream.index(close_tag))
116
+ xml = stream[from_here..to_there + (close_tag.length-1)]
117
+ doc = Nokogiri::XML([xml_header, xml].join).remove_namespaces!
118
+ found_listing_keys << create_queued_listing_and_return_listing_key(doc, import)
119
+ stream.gsub!(xml, '')
120
+ if ((l += 1) % 1000).zero?
121
+ puts "#{l}\t#{l/(Time.now - start)}" if Rails.env.development?
122
+ end
123
+ GC.start if (l % 100).zero?
120
124
  end
121
- GC.start if (l % 100).zero?
122
125
  end
126
+ end_time = Time.now
127
+ removed_listing_keys = import.remove_listings_not_present(found_listing_keys)
128
+ import_result.assign_attributes({
129
+ end_time: end_time,
130
+ found_listing_keys: found_listing_keys,
131
+ removed_listing_keys: removed_listing_keys.inspect
132
+ })
133
+ import_result.save
134
+ import.update_attribute(:source_data_modified, source_data_modified)
135
+ File.delete(filepath)
123
136
  end
124
- puts "Import speed: #{l/(Time.now - start)} listings/s" if Rails.env.development?
125
- puts "Found #{l} new listings." if Rails.env.development?
126
- stale_listing_keys = import.remove_listings_no_longer_present(incoming_listing_keys)
127
- puts "Removed #{stale_listing_keys.count} old listings." if Rails.env.development?
128
- File.delete(filepath)
129
137
  end
130
138
  end
131
139
 
@@ -1,3 +1,3 @@
1
1
  module Reso
2
- VERSION = "0.1.1.0"
2
+ VERSION = "0.1.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reso
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1.0
4
+ version: 0.1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Edlund
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-24 00:00:00.000000000 Z
11
+ date: 2015-08-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake