file_indexing 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -72,22 +72,23 @@ module FileIndexing
72
72
  # TODO device support
73
73
  def index(patterns, otherDB = nil)
74
74
  abort "#{self.class}: DB not empty. Current implementation permits only one running of index" \
75
- unless @indexed_content.contents.empty?
76
-
77
- server_name = `hostname`.strip
78
- permit_patterns = Array.new
79
- forbid_patterns = Array.new
80
- otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
81
- otherDB_contents = Hash.new # given DB contents
75
+ unless @indexed_content.empty?
76
+ local_server_name = `hostname`.strip
77
+ permit_patterns = []
78
+ forbid_patterns = []
79
+ otherDB_updated = ContentData::ContentData.new
80
+ #otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
81
+ #otherDB_contents = Hash.new # given DB contents
82
82
 
83
83
  # if there is a given DB then populate table with files
84
84
  # that was already indexed on this server/device
85
- if (otherDB != nil)
86
- otherDB_contents.update(otherDB.contents)
87
- otherDB.instances.each_value do |i|
88
- next unless i.server_name == server_name #and i.device == @device
89
- otherDB_table[i.full_path] = i
90
- end
85
+ if !otherDB.nil?
86
+ otherDB.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
87
+ if (server == local_server_name)
88
+ # add instance
89
+ otherDB_updated.add_instance(checksum, size, server, path, instance_mod_time)
90
+ end
91
+ }
91
92
  end
92
93
 
93
94
  permit_patterns = patterns.positive_patterns
@@ -116,23 +117,27 @@ module FileIndexing
116
117
  files.each do |file|
117
118
  file_stats = File.lstat(file)
118
119
  file_mtime = IndexAgent.get_correct_mtime(file)
120
+ device = file_stats.dev.to_s
119
121
 
120
122
  # index only files
121
123
  next if file_stats.directory?
122
124
 
123
125
  # add files present in the given DB to the DB and remove these files
124
126
  # from further processing (save checksum calculation)
125
- if otherDB_table.has_key?(file)
126
- instance = otherDB_table[file]
127
- if instance.size == file_stats.size and instance.modification_time == file_mtime
128
- @indexed_content.add_content(otherDB_contents[instance.checksum])
129
- @indexed_content.add_instance(instance)
130
- next
131
- else
132
- Log.warning("File (#{file}) size or modification file is different.")
127
+ file_match = false
128
+ otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
129
+ if otherDB_updated.instance_exists(file, local_server_name, checksum)
130
+ if size == file_stats.size and instance_mod_time == file_mtime.to_i
131
+ @indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
132
+ file_match = true
133
+ break
134
+ else
135
+ Log.warning("File (#{file}) size or modification file is different. size=#{size} actual size=#{file_stats.size}" + \
136
+ " instance_mod_time=#{Time.at(instance_mod_time)} actual=#{file_mtime}")
137
+ end
133
138
  end
134
- end
135
-
139
+ }
140
+ next if file_match
136
141
  # calculate a checksum
137
142
  unless (checksum = self.class.get_checksum(file))
138
143
  Log.warning("Cheksum failure: " + file)
@@ -140,15 +145,8 @@ module FileIndexing
140
145
  next
141
146
  end
142
147
 
143
- if !@indexed_content.content_exists(checksum)
144
- @indexed_content.add_content ContentData::Content.new(checksum, file_stats.size,
145
- Time.now.utc)
146
- end
147
-
148
- instance = ContentData::ContentInstance.new(
149
- checksum, file_stats.size, server_name, file_stats.dev.to_s,
150
- File.expand_path(file), file_mtime)
151
- @indexed_content.add_instance(instance)
148
+ @indexed_content.add_instance(checksum, file_stats.size, local_server_name,
149
+ File.expand_path(file), file_mtime.to_i)
152
150
  end
153
151
  end
154
152
 
@@ -156,15 +154,17 @@ module FileIndexing
156
154
  return nil unless File.exists?(filename)
157
155
  file_stats = File.lstat(filename)
158
156
  file_mtime = IndexAgent.get_correct_mtime(filename)
159
- ContentData::ContentInstance.new(nil, file_stats.size, nil, file_stats.dev.to_s,
160
- File.expand_path(filename), file_mtime)
157
+ # return instance shallow representation (no server)
158
+ [file_stats.size,
159
+ "%s,%s,%s" % [`hostname`.strip , file_stats.dev.to_s , File.expand_path(filename)],
160
+ file_mtime.to_i]
161
161
  end
162
162
 
163
163
  def IndexAgent.global_path(filename)
164
164
  server_name = `hostname`.strip
165
- return ContentData::ContentInstance.instance_global_path(server_name, filename)
165
+ file_stats = File.lstat(filename)
166
+ return "%s,%s,%s" % [server_name, file_stats.dev.to_s,filename]
166
167
  end
167
168
  end
168
-
169
169
  end
170
170
 
@@ -1,3 +1,3 @@
1
1
  module FileIndexing
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -14,10 +14,10 @@ module FileIndexing
14
14
  patterns.add_pattern File.join(File.dirname(__FILE__), 'index_agent_test\**\*.h'), false
15
15
 
16
16
  indexer.index(patterns)
17
- # ./index_agent_test/lib/libexslt.lib
18
- Log.info "Contents: #{indexer.indexed_content.contents}."
19
- assert(indexer.indexed_content.content_exists('c6d9d837659e38d906a4bbdcc6703bc37e9ac7e8'))
20
- # .index_agent_test/include/libexslt/exsltexports.h
17
+ # ./index_agent_test/lib/libexslt.lib
18
+ Log.info "Contents: #{indexer.indexed_content}."
19
+ assert(indexer.indexed_content.content_exists('c6d9d837659e38d906a4bbdcc6703bc37e9ac7e8'))
20
+ # .index_agent_test/include/libexslt/exsltexports.h
21
21
  assert_equal(false, indexer.indexed_content.content_exists('5c87a31b0106b3c4bb1768e43f5b8c41139882c2'))
22
22
  # ./index_agent_test/bin/xsltproc.exe
23
23
  assert(indexer.indexed_content.content_exists('d0d57ff4834a517a52004f59ee5cdb63f2f0427b'))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-30 00:00:00.000000000 Z
12
+ date: 2013-06-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: content_data