file_indexing 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,22 +72,23 @@ module FileIndexing
72
72
  # TODO device support
73
73
  def index(patterns, otherDB = nil)
74
74
  abort "#{self.class}: DB not empty. Current implementation permits only one running of index" \
75
- unless @indexed_content.contents.empty?
76
-
77
- server_name = `hostname`.strip
78
- permit_patterns = Array.new
79
- forbid_patterns = Array.new
80
- otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
81
- otherDB_contents = Hash.new # given DB contents
75
+ unless @indexed_content.empty?
76
+ local_server_name = `hostname`.strip
77
+ permit_patterns = []
78
+ forbid_patterns = []
79
+ otherDB_updated = ContentData::ContentData.new
80
+ #otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
81
+ #otherDB_contents = Hash.new # given DB contents
82
82
 
83
83
  # if there is a given DB then populate table with files
84
84
  # that was already indexed on this server/device
85
- if (otherDB != nil)
86
- otherDB_contents.update(otherDB.contents)
87
- otherDB.instances.each_value do |i|
88
- next unless i.server_name == server_name #and i.device == @device
89
- otherDB_table[i.full_path] = i
90
- end
85
+ if !otherDB.nil?
86
+ otherDB.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
87
+ if (server == local_server_name)
88
+ # add instance
89
+ otherDB_updated.add_instance(checksum, size, server, path, instance_mod_time)
90
+ end
91
+ }
91
92
  end
92
93
 
93
94
  permit_patterns = patterns.positive_patterns
@@ -116,23 +117,27 @@ module FileIndexing
116
117
  files.each do |file|
117
118
  file_stats = File.lstat(file)
118
119
  file_mtime = IndexAgent.get_correct_mtime(file)
120
+ device = file_stats.dev.to_s
119
121
 
120
122
  # index only files
121
123
  next if file_stats.directory?
122
124
 
123
125
  # add files present in the given DB to the DB and remove these files
124
126
  # from further processing (save checksum calculation)
125
- if otherDB_table.has_key?(file)
126
- instance = otherDB_table[file]
127
- if instance.size == file_stats.size and instance.modification_time == file_mtime
128
- @indexed_content.add_content(otherDB_contents[instance.checksum])
129
- @indexed_content.add_instance(instance)
130
- next
131
- else
132
- Log.warning("File (#{file}) size or modification file is different.")
127
+ file_match = false
128
+ otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
129
+ if otherDB_updated.instance_exists(file, local_server_name, checksum)
130
+ if size == file_stats.size and instance_mod_time == file_mtime.to_i
131
+ @indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
132
+ file_match = true
133
+ break
134
+ else
135
+ Log.warning("File (#{file}) size or modification file is different. size=#{size} actual size=#{file_stats.size}" + \
136
+ " instance_mod_time=#{Time.at(instance_mod_time)} actual=#{file_mtime}")
137
+ end
133
138
  end
134
- end
135
-
139
+ }
140
+ next if file_match
136
141
  # calculate a checksum
137
142
  unless (checksum = self.class.get_checksum(file))
138
143
  Log.warning("Cheksum failure: " + file)
@@ -140,15 +145,8 @@ module FileIndexing
140
145
  next
141
146
  end
142
147
 
143
- if !@indexed_content.content_exists(checksum)
144
- @indexed_content.add_content ContentData::Content.new(checksum, file_stats.size,
145
- Time.now.utc)
146
- end
147
-
148
- instance = ContentData::ContentInstance.new(
149
- checksum, file_stats.size, server_name, file_stats.dev.to_s,
150
- File.expand_path(file), file_mtime)
151
- @indexed_content.add_instance(instance)
148
+ @indexed_content.add_instance(checksum, file_stats.size, local_server_name,
149
+ File.expand_path(file), file_mtime.to_i)
152
150
  end
153
151
  end
154
152
 
@@ -156,15 +154,17 @@ module FileIndexing
156
154
  return nil unless File.exists?(filename)
157
155
  file_stats = File.lstat(filename)
158
156
  file_mtime = IndexAgent.get_correct_mtime(filename)
159
- ContentData::ContentInstance.new(nil, file_stats.size, nil, file_stats.dev.to_s,
160
- File.expand_path(filename), file_mtime)
157
+ # return instance shallow representation (no server)
158
+ [file_stats.size,
159
+ "%s,%s,%s" % [`hostname`.strip , file_stats.dev.to_s , File.expand_path(filename)],
160
+ file_mtime.to_i]
161
161
  end
162
162
 
163
163
  def IndexAgent.global_path(filename)
164
164
  server_name = `hostname`.strip
165
- return ContentData::ContentInstance.instance_global_path(server_name, filename)
165
+ file_stats = File.lstat(filename)
166
+ return "%s,%s,%s" % [server_name, file_stats.dev.to_s,filename]
166
167
  end
167
168
  end
168
-
169
169
  end
170
170
 
@@ -1,3 +1,3 @@
1
1
  module FileIndexing
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -14,10 +14,10 @@ module FileIndexing
14
14
  patterns.add_pattern File.join(File.dirname(__FILE__), 'index_agent_test\**\*.h'), false
15
15
 
16
16
  indexer.index(patterns)
17
- # ./index_agent_test/lib/libexslt.lib
18
- Log.info "Contents: #{indexer.indexed_content.contents}."
19
- assert(indexer.indexed_content.content_exists('c6d9d837659e38d906a4bbdcc6703bc37e9ac7e8'))
20
- # .index_agent_test/include/libexslt/exsltexports.h
17
+ # ./index_agent_test/lib/libexslt.lib
18
+ Log.info "Contents: #{indexer.indexed_content}."
19
+ assert(indexer.indexed_content.content_exists('c6d9d837659e38d906a4bbdcc6703bc37e9ac7e8'))
20
+ # .index_agent_test/include/libexslt/exsltexports.h
21
21
  assert_equal(false, indexer.indexed_content.content_exists('5c87a31b0106b3c4bb1768e43f5b8c41139882c2'))
22
22
  # ./index_agent_test/bin/xsltproc.exe
23
23
  assert(indexer.indexed_content.content_exists('d0d57ff4834a517a52004f59ee5cdb63f2f0427b'))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-30 00:00:00.000000000 Z
12
+ date: 2013-06-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: content_data