file_indexing 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
@@ -72,22 +72,23 @@ module FileIndexing
|
|
72
72
|
# TODO device support
|
73
73
|
def index(patterns, otherDB = nil)
|
74
74
|
abort "#{self.class}: DB not empty. Current implementation permits only one running of index" \
|
75
|
-
unless @indexed_content.
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
|
81
|
-
otherDB_contents = Hash.new # given DB contents
|
75
|
+
unless @indexed_content.empty?
|
76
|
+
local_server_name = `hostname`.strip
|
77
|
+
permit_patterns = []
|
78
|
+
forbid_patterns = []
|
79
|
+
otherDB_updated = ContentData::ContentData.new
|
80
|
+
#otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
|
81
|
+
#otherDB_contents = Hash.new # given DB contents
|
82
82
|
|
83
83
|
# if there is a given DB then populate table with files
|
84
84
|
# that was already indexed on this server/device
|
85
|
-
if
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
85
|
+
if !otherDB.nil?
|
86
|
+
otherDB.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
87
|
+
if (server == local_server_name)
|
88
|
+
# add instance
|
89
|
+
otherDB_updated.add_instance(checksum, size, server, path, instance_mod_time)
|
90
|
+
end
|
91
|
+
}
|
91
92
|
end
|
92
93
|
|
93
94
|
permit_patterns = patterns.positive_patterns
|
@@ -116,23 +117,27 @@ module FileIndexing
|
|
116
117
|
files.each do |file|
|
117
118
|
file_stats = File.lstat(file)
|
118
119
|
file_mtime = IndexAgent.get_correct_mtime(file)
|
120
|
+
device = file_stats.dev.to_s
|
119
121
|
|
120
122
|
# index only files
|
121
123
|
next if file_stats.directory?
|
122
124
|
|
123
125
|
# add files present in the given DB to the DB and remove these files
|
124
126
|
# from further processing (save checksum calculation)
|
125
|
-
|
126
|
-
|
127
|
-
if
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
127
|
+
file_match = false
|
128
|
+
otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
129
|
+
if otherDB_updated.instance_exists(file, local_server_name, checksum)
|
130
|
+
if size == file_stats.size and instance_mod_time == file_mtime.to_i
|
131
|
+
@indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
|
132
|
+
file_match = true
|
133
|
+
break
|
134
|
+
else
|
135
|
+
Log.warning("File (#{file}) size or modification file is different. size=#{size} actual size=#{file_stats.size}" + \
|
136
|
+
" instance_mod_time=#{Time.at(instance_mod_time)} actual=#{file_mtime}")
|
137
|
+
end
|
133
138
|
end
|
134
|
-
|
135
|
-
|
139
|
+
}
|
140
|
+
next if file_match
|
136
141
|
# calculate a checksum
|
137
142
|
unless (checksum = self.class.get_checksum(file))
|
138
143
|
Log.warning("Cheksum failure: " + file)
|
@@ -140,15 +145,8 @@ module FileIndexing
|
|
140
145
|
next
|
141
146
|
end
|
142
147
|
|
143
|
-
|
144
|
-
|
145
|
-
Time.now.utc)
|
146
|
-
end
|
147
|
-
|
148
|
-
instance = ContentData::ContentInstance.new(
|
149
|
-
checksum, file_stats.size, server_name, file_stats.dev.to_s,
|
150
|
-
File.expand_path(file), file_mtime)
|
151
|
-
@indexed_content.add_instance(instance)
|
148
|
+
@indexed_content.add_instance(checksum, file_stats.size, local_server_name,
|
149
|
+
File.expand_path(file), file_mtime.to_i)
|
152
150
|
end
|
153
151
|
end
|
154
152
|
|
@@ -156,15 +154,17 @@ module FileIndexing
|
|
156
154
|
return nil unless File.exists?(filename)
|
157
155
|
file_stats = File.lstat(filename)
|
158
156
|
file_mtime = IndexAgent.get_correct_mtime(filename)
|
159
|
-
|
160
|
-
|
157
|
+
# return instance shallow representation (no server)
|
158
|
+
[file_stats.size,
|
159
|
+
"%s,%s,%s" % [`hostname`.strip , file_stats.dev.to_s , File.expand_path(filename)],
|
160
|
+
file_mtime.to_i]
|
161
161
|
end
|
162
162
|
|
163
163
|
def IndexAgent.global_path(filename)
|
164
164
|
server_name = `hostname`.strip
|
165
|
-
|
165
|
+
file_stats = File.lstat(filename)
|
166
|
+
return "%s,%s,%s" % [server_name, file_stats.dev.to_s,filename]
|
166
167
|
end
|
167
168
|
end
|
168
|
-
|
169
169
|
end
|
170
170
|
|
@@ -14,10 +14,10 @@ module FileIndexing
|
|
14
14
|
patterns.add_pattern File.join(File.dirname(__FILE__), 'index_agent_test\**\*.h'), false
|
15
15
|
|
16
16
|
indexer.index(patterns)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
# ./index_agent_test/lib/libexslt.lib
|
18
|
+
Log.info "Contents: #{indexer.indexed_content}."
|
19
|
+
assert(indexer.indexed_content.content_exists('c6d9d837659e38d906a4bbdcc6703bc37e9ac7e8'))
|
20
|
+
# .index_agent_test/include/libexslt/exsltexports.h
|
21
21
|
assert_equal(false, indexer.indexed_content.content_exists('5c87a31b0106b3c4bb1768e43f5b8c41139882c2'))
|
22
22
|
# ./index_agent_test/bin/xsltproc.exe
|
23
23
|
assert(indexer.indexed_content.content_exists('d0d57ff4834a517a52004f59ee5cdb63f2f0427b'))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file_indexing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: content_data
|