file_indexing 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,22 +72,23 @@ module FileIndexing
|
|
72
72
|
# TODO device support
|
73
73
|
def index(patterns, otherDB = nil)
|
74
74
|
abort "#{self.class}: DB not empty. Current implementation permits only one running of index" \
|
75
|
-
unless @indexed_content.
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
|
81
|
-
otherDB_contents = Hash.new # given DB contents
|
75
|
+
unless @indexed_content.empty?
|
76
|
+
local_server_name = `hostname`.strip
|
77
|
+
permit_patterns = []
|
78
|
+
forbid_patterns = []
|
79
|
+
otherDB_updated = ContentData::ContentData.new
|
80
|
+
#otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value
|
81
|
+
#otherDB_contents = Hash.new # given DB contents
|
82
82
|
|
83
83
|
# if there is a given DB then populate table with files
|
84
84
|
# that was already indexed on this server/device
|
85
|
-
if
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
85
|
+
if !otherDB.nil?
|
86
|
+
otherDB.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
87
|
+
if (server == local_server_name)
|
88
|
+
# add instance
|
89
|
+
otherDB_updated.add_instance(checksum, size, server, path, instance_mod_time)
|
90
|
+
end
|
91
|
+
}
|
91
92
|
end
|
92
93
|
|
93
94
|
permit_patterns = patterns.positive_patterns
|
@@ -116,23 +117,27 @@ module FileIndexing
|
|
116
117
|
files.each do |file|
|
117
118
|
file_stats = File.lstat(file)
|
118
119
|
file_mtime = IndexAgent.get_correct_mtime(file)
|
120
|
+
device = file_stats.dev.to_s
|
119
121
|
|
120
122
|
# index only files
|
121
123
|
next if file_stats.directory?
|
122
124
|
|
123
125
|
# add files present in the given DB to the DB and remove these files
|
124
126
|
# from further processing (save checksum calculation)
|
125
|
-
|
126
|
-
|
127
|
-
if
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
127
|
+
file_match = false
|
128
|
+
otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
|
129
|
+
if otherDB_updated.instance_exists(file, local_server_name, checksum)
|
130
|
+
if size == file_stats.size and instance_mod_time == file_mtime.to_i
|
131
|
+
@indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
|
132
|
+
file_match = true
|
133
|
+
break
|
134
|
+
else
|
135
|
+
Log.warning("File (#{file}) size or modification file is different. size=#{size} actual size=#{file_stats.size}" + \
|
136
|
+
" instance_mod_time=#{Time.at(instance_mod_time)} actual=#{file_mtime}")
|
137
|
+
end
|
133
138
|
end
|
134
|
-
|
135
|
-
|
139
|
+
}
|
140
|
+
next if file_match
|
136
141
|
# calculate a checksum
|
137
142
|
unless (checksum = self.class.get_checksum(file))
|
138
143
|
Log.warning("Cheksum failure: " + file)
|
@@ -140,15 +145,8 @@ module FileIndexing
|
|
140
145
|
next
|
141
146
|
end
|
142
147
|
|
143
|
-
|
144
|
-
|
145
|
-
Time.now.utc)
|
146
|
-
end
|
147
|
-
|
148
|
-
instance = ContentData::ContentInstance.new(
|
149
|
-
checksum, file_stats.size, server_name, file_stats.dev.to_s,
|
150
|
-
File.expand_path(file), file_mtime)
|
151
|
-
@indexed_content.add_instance(instance)
|
148
|
+
@indexed_content.add_instance(checksum, file_stats.size, local_server_name,
|
149
|
+
File.expand_path(file), file_mtime.to_i)
|
152
150
|
end
|
153
151
|
end
|
154
152
|
|
@@ -156,15 +154,17 @@ module FileIndexing
|
|
156
154
|
return nil unless File.exists?(filename)
|
157
155
|
file_stats = File.lstat(filename)
|
158
156
|
file_mtime = IndexAgent.get_correct_mtime(filename)
|
159
|
-
|
160
|
-
|
157
|
+
# return instance shallow representation (no server)
|
158
|
+
[file_stats.size,
|
159
|
+
"%s,%s,%s" % [`hostname`.strip , file_stats.dev.to_s , File.expand_path(filename)],
|
160
|
+
file_mtime.to_i]
|
161
161
|
end
|
162
162
|
|
163
163
|
def IndexAgent.global_path(filename)
|
164
164
|
server_name = `hostname`.strip
|
165
|
-
|
165
|
+
file_stats = File.lstat(filename)
|
166
|
+
return "%s,%s,%s" % [server_name, file_stats.dev.to_s,filename]
|
166
167
|
end
|
167
168
|
end
|
168
|
-
|
169
169
|
end
|
170
170
|
|
@@ -14,10 +14,10 @@ module FileIndexing
|
|
14
14
|
patterns.add_pattern File.join(File.dirname(__FILE__), 'index_agent_test\**\*.h'), false
|
15
15
|
|
16
16
|
indexer.index(patterns)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
# ./index_agent_test/lib/libexslt.lib
|
18
|
+
Log.info "Contents: #{indexer.indexed_content}."
|
19
|
+
assert(indexer.indexed_content.content_exists('c6d9d837659e38d906a4bbdcc6703bc37e9ac7e8'))
|
20
|
+
# .index_agent_test/include/libexslt/exsltexports.h
|
21
21
|
assert_equal(false, indexer.indexed_content.content_exists('5c87a31b0106b3c4bb1768e43f5b8c41139882c2'))
|
22
22
|
# ./index_agent_test/bin/xsltproc.exe
|
23
23
|
assert(indexer.indexed_content.content_exists('d0d57ff4834a517a52004f59ee5cdb63f2f0427b'))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file_indexing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: content_data
|