bc3 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/bc3.rb CHANGED
@@ -34,15 +34,13 @@ Prepare a virtual folder structure as you would like to have
34
34
  and use BC3 to convert your real structure to your wishes.
35
35
 
36
36
  ==Scan for identic files.
37
- <em>
38
- The only way to find duplicate files is to flatten your folder structure so that all the files appear to be at the same level. You then can enable/show the CRC column, sort by it, then manually scan the file list for files with the same size and CRC code. I wouldn't advise it for a large number of files and folders, though. It could take a while to calculate all the CRC values, and it would be a tedious process to manually look through them all for dups.
39
- </em>
40
- http://www.scootersoftware.com/vbulletin/showpost.php?p=27736&postcount=4
37
+ See also bin/bc3_search.rb
41
38
 
42
39
  With this gem, you can analyse a snapshot, copy the data in a hash
43
40
  and search for duplicate CRCs
44
41
 
45
42
 
43
+
46
44
  =Warning and Restrictions
47
45
  Please don't
48
46
  require 'bc3'
@@ -69,8 +67,10 @@ Maximum 2GB (more is supported by BC3, but not by this gem).
69
67
  =end
70
68
  require 'date'
71
69
  require 'zlib'
70
+ require 'yaml'
72
71
 
73
72
  require 'log4r'
73
+
74
74
  $log = Log4r::Logger.new('BC3')
75
75
  $log.outputters = Log4r::StdoutOutputter.new('log_stdout')
76
76
  $log.level = Log4r::INFO
@@ -85,5 +85,5 @@ require 'bc3/time'
85
85
  require 'bc3/parse'
86
86
 
87
87
  module BC3
88
- VERSION = '0.1.0'
88
+ VERSION = '0.1.1'
89
89
  end
@@ -13,7 +13,7 @@ If you need the original File inside BC3-module, use
13
13
  KEYS = [:filename, :filesize ]
14
14
  KEYS_OPTIONAL = [
15
15
  :timestamp, :crc, :attributes,
16
- :version,
16
+ :version, :utfpath, :utfsymlink,
17
17
  ]
18
18
 
19
19
  =begin rdoc
@@ -23,7 +23,7 @@ Arguments are given in a hash.
23
23
  Must contain KEYS and supports KEYS_OPTIONAL
24
24
  =end
25
25
  def initialize( args )
26
- raise ArgumentError unless args.kind_of?(Hash)
26
+ raise ArgumentError, "No hash given (#{args.inspect})" unless args.kind_of?(Hash)
27
27
  KEYS.each{|key|
28
28
  raise ArgumentError, "Missing Key #{key}" unless args.has_key?(key)
29
29
  }
@@ -38,7 +38,10 @@ Must contain KEYS and supports KEYS_OPTIONAL
38
38
  @attributes = args[:attributes] || Attrib::Archive
39
39
 
40
40
  @version = args[:version] #-> Extended File
41
+ @utfpath = args[:utfpath] #-> Extended File
42
+ @utfsymlink = args[:utfsymlink] #-> Extended File
41
43
 
44
+ @snapshotpath = {}
42
45
  #Test content
43
46
  raise ArgumentError, "timestamp is no time-object" unless @timestamp.kind_of?(Time)
44
47
  end
@@ -52,7 +55,7 @@ The argument must contain:
52
55
  def self.new_by_filename( filename )
53
56
  $log.debug("Build file #{filename} from file system")
54
57
 
55
- #fixme: attrib
58
+ #fixme: get attributes from file system
56
59
  #~ p ::File.stat(filename)
57
60
  #~ Get attributes: http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/68298
58
61
 
@@ -69,16 +72,27 @@ The argument must contain:
69
72
  } #fixme crc if ...
70
73
  return File.new( settings )
71
74
  end
75
+ #Name of the file
72
76
  attr_reader :filename
77
+ #Name of the file
78
+ alias :basename :filename
79
+ attr_reader :crc
80
+ #Path (location) in snapshots
81
+ attr_reader :snapshotpath
73
82
 
74
83
  def to_hash()
75
- {
84
+ hash = {
76
85
  filename: @filename,
77
86
  filesize: @filesize,
78
87
  crc: @crc,
79
88
  attributes: @attributes,
80
- timestamp: @timestamp
89
+ timestamp: @timestamp,
81
90
  }
91
+ hash[:version] = @version if @version
92
+ hash[:utfpath] = @utfpath if @utfpath
93
+ hash[:utfsymlink] = @utfsymlink if @utfsymlink
94
+
95
+ hash
82
96
  end
83
97
  def inspect()
84
98
  "<BC3::File #{@filename}>"
@@ -99,7 +113,11 @@ ID_FILE (0x02)
99
113
  =end
100
114
  def bcss()
101
115
  bcss = "".force_encoding('BINARY')
102
- bcss << 2
116
+ if @version or @utfpath or @utfsymlink
117
+ bcss << 3 #with file extended_headers
118
+ else
119
+ bcss << 2
120
+ end
103
121
  bcss << @filename.size
104
122
  bcss << @filename
105
123
  bcss << fixnum2int64(@timestamp.time2ad)
@@ -113,8 +131,69 @@ ID_FILE (0x02)
113
131
 
114
132
  #CRC32
115
133
  bcss << fixnum2int32(@crc || 0)
116
- #~ bcss << 255
134
+ #extended header
135
+ bcss << bcss_extended_headers if @version or @utfpath or @utfsymlink
136
+
117
137
  bcss
118
138
  end
119
- end #Folder
139
+ =begin rdoc
140
+
141
+ ID_FILE (0x02) +
142
+ ExtraLen : UInt16
143
+ ExtraData : Byte[ExtraLen]
144
+
145
+ =====================
146
+ File Extended Headers
147
+ =====================
148
+
149
+ Like extended headers, file extended headers should be written in ascending
150
+ numeric order. Multiple headers may occur within a single ID_FILE_EX record,
151
+ and compliant parsers should break once they read a type they don't recognize.
152
+
153
+ FILE_EX_VERSION (0x01)
154
+ String representation of an executable file's Major/Minor/Maint/Build
155
+ version (e.g., "2.11.28.3542").
156
+
157
+ Length : UByte
158
+ Data : char[Length]
159
+
160
+
161
+ FILE_EX_UTF8 (0x02)
162
+ UTF-8 encoded filename. Stored as a FileExString. Only used if the UTF-8
163
+ name doesn't match the ANSI encoded one or if the filename is longer than 255
164
+ characters.
165
+
166
+
167
+ FILE_EX_LINK_PATH (0x03)
168
+ UTF-8 encoded symbolic link path. Stored as a FileExString.
169
+
170
+ =end
171
+ def bcss_extended_headers()
172
+ ehead = ''
173
+ if @version
174
+ ehead << 1
175
+ ehead << @version.size
176
+ ehead << @version
177
+ end
178
+ if @utfpath
179
+ ehead << 2
180
+ utfpath = @utfpath.dup.force_encoding('binary')
181
+ ehead << utfpath.size
182
+ ehead << utfpath
183
+ end
184
+ if @utfsymlink
185
+ ehead << 3
186
+ utfsymlink = @utfsymlink.dup.force_encoding('binary')
187
+ ehead << utfsymlink.size
188
+ ehead << utfsymlink
189
+ end
190
+ #fixme
191
+
192
+ bcss = ''
193
+ bcss << fixnum2int16(ehead.size + 2) #size including this length
194
+ bcss << ehead
195
+
196
+ bcss
197
+ end
198
+ end #File
120
199
  end #BC3
@@ -10,7 +10,7 @@ module BC3
10
10
  def initialize( dirname, timestamp = Time.now, attributes = nil )
11
11
  @dirname = dirname
12
12
  $log.debug("Create folder #{@dirname}")
13
- raise ArgumentError unless @dirname.is_a?(String)
13
+ raise ArgumentError, "Dirname no string #{@dirname.inspect}" unless @dirname.is_a?(String)
14
14
 
15
15
  @timestamp = timestamp || Time.now
16
16
  raise ArgumentError, "No Timestamp for Folder" unless @timestamp.is_a?(Time)
@@ -18,6 +18,8 @@ module BC3
18
18
  @attributes = attributes || Attrib::Directory
19
19
 
20
20
  @content = {}
21
+ @snapshotpath = {}
22
+
21
23
  end
22
24
  =begin rdoc
23
25
  Create a folder from a hash.
@@ -81,42 +83,84 @@ The argument must contain:
81
83
  end
82
84
  #Name of the folder
83
85
  attr_reader :dirname
86
+ #Name of the folder
87
+ alias :basename :dirname
88
+ #Path (location) in snapshots
89
+ attr_reader :snapshotpath
90
+ #timestamp
91
+ attr_reader :timestamp
92
+ attr_reader :attributes
93
+
84
94
  =begin rdoc
85
- Add content to Folder.
95
+ Add content to folder.
86
96
  The 'content' must support the bcss-method.
87
97
 
88
98
  Content may be:
89
- * another folder
99
+ * another folder (if pathes are similar to existing folder: see #mixin)
90
100
  * a file
101
+
102
+ If another object with similar name exists, it will be overwritten.
91
103
  =end
92
104
  def << (content)
93
- if !content.respond_to?( :bcss )
105
+ #Check for BC3::File or BC3::Folder
106
+ if ! ( content.respond_to?( :bcss ) and content.respond_to?( :basename ) )
94
107
  $log.error("Add unknown datatype as content #{content.inspect}")
95
108
  raise ArgumentError, "Add unknown datatype as content #{content.inspect}"
96
109
  end
97
110
 
98
- key = content.respond_to?(:dirname) ? content.dirname : content.filename
99
- if @content[key] #content already available
100
- if @content[key].respond_to? :dirname and content.respond_to? :dirname
101
- $log.debug("Mix into folder #{key} in folder #{self.inspect}")
102
- @content[key].mixin(content)
111
+ #reset index in relatd snapshot.
112
+ self.snapshotpath.each{|snapshot, path| snapshot.reset_index }
113
+
114
+ if @content[content.basename] #content already available
115
+ #Target and new element are both folders?
116
+ if @content[content.basename].respond_to? :dirname and content.respond_to? :dirname
117
+ $log.debug("Mix into folder #{content.basename} in folder #{self.inspect}")
118
+ #return the folder, where it is mixed in
119
+ content = @content[content.basename].mixin(content)
103
120
  else
104
- $log.warn("Overwrite #{key} in folder #{self.inspect}")
105
- @content[key] = content
121
+ $log.info("Overwrite #{content.basename} in folder #{self.inspect}")
122
+ @content[content.basename] = content
106
123
  end
107
- else
108
- @content[key] = content
124
+ else #add new content
125
+ @content[content.basename] = content
109
126
  $log.debug("Add to folder #{self.inspect}: #{content.inspect}") if $log.debug?
110
127
  end
128
+ content #return the folder, where it is mixed in
111
129
  end
112
130
  =begin rdoc
113
131
  Merge two folders.
114
132
  The content of a folder is added to the actual content.
115
133
  =end
116
134
  def mixin( folder )
117
- $log.info("Mix in content to folder #{@dirname} in folder #{self.inspect}")
135
+ $log.debug("Mix in content to folder #{@dirname} in folder #{self.inspect}")
136
+ raise ArgumentError, "Add unknown datatype as content #{content.inspect}" unless folder.is_a?(Folder)
118
137
  folder.each{|key, content| self << content }
138
+ self #return the folder, where it is mixed in
139
+ end
140
+ =begin rdoc
141
+ Add an element with a given path.
142
+
143
+ Folders will be created for the complete path if necessary.
144
+ =end
145
+ def add_with_path(path, element)
146
+ path = path.split(%r{[\\\/]}) unless path.is_a?(Array)
147
+ if path.empty?
148
+ self << element
149
+ else #subpath
150
+ #Search target folder
151
+ target = nil
152
+ @content.each{|name, folder|
153
+ if name == path.first
154
+ target = folder
155
+ break #target found
156
+ end
157
+ }
158
+ self << target = Folder.new(path.first) unless target
159
+ path.shift
160
+ target.add_with_path( path, element )
119
161
  end
162
+ element
163
+ end
120
164
  =begin rdoc
121
165
  Loop on content of the folder and return path + object.
122
166
 
@@ -127,27 +171,26 @@ Options may be
127
171
  The result is not nested, pathes are complete
128
172
  =end
129
173
  def each(*options)
130
- if options.empty?
131
- extract = @content #take all
132
- else
133
- extract = {}
134
- @content.each{|key, content|
135
- case content
136
- when File
137
- extract[key] = content if options.include?(:files)
138
- when Folder
139
- extract[key] = content if options.include?(:folders)
140
- else
141
- raise "Internal error"
142
- end
143
-
144
- if options.include?(:recursive) and content.is_a?(Folder)
145
- content.each(*options).each{|subkey, subcontent|
146
- extract[[key, subkey].join('/')] = subcontent
147
- }
148
- end
149
- }
150
- end
174
+
175
+ options = [:folders, :files ] if options.empty? #take all, but not recursive
176
+
177
+ extract = {}
178
+ @content.each{|key, content|
179
+ case content
180
+ when File
181
+ extract[key] = content if options.include?(:files)
182
+ when Folder
183
+ extract[key+'/'] = content if options.include?(:folders)
184
+ else
185
+ raise "Internal error"
186
+ end
187
+
188
+ if options.include?(:recursive) and content.is_a?(Folder)
189
+ content.each(*options).each{|subkey, subcontent|
190
+ extract[[key, subkey].join('/')] = subcontent
191
+ }
192
+ end
193
+ }
151
194
 
152
195
  if block_given?
153
196
  extract.each{|key, content| yield key, content }
@@ -155,6 +198,7 @@ Options may be
155
198
  extract
156
199
  end
157
200
  end
201
+
158
202
  #~ def values();@content.values;end #fixme
159
203
  def inspect()
160
204
  "<BC3::Folder #{@dirname}>"
@@ -211,12 +255,14 @@ end #BC3
211
255
 
212
256
  if $0 == __FILE__
213
257
 
214
- folder = BC3::Folder.new('x')
215
- folder << BC3::Folder.new('x')
216
- folder << BC3::File.new( filename: 'x2', filesize: 1)
217
- folder << BC3::File.new( filename: 'x', filesize: 1)
258
+ folder = BC3::Folder.new('base')
259
+ #~ folder << BC3::Folder.new('dir')
260
+ #~ folder << BC3::File.new( filename: 'x2', filesize: 1)
261
+ #~ folder << BC3::File.new( filename: 'x', filesize: 1)
262
+ #~ folder.add_with_path('dir2', BC3::File.new(filename: 'test.txt', filesize: 5 ))
263
+ folder.add_with_path('dir2/subdir', BC3::File.new(filename: 'test,txt', filesize: 5 ))
218
264
 
219
- puts folder.each
265
+ puts folder.to_hash.to_yaml
220
266
  exit
221
267
 
222
268
  require 'YAML'
@@ -233,7 +279,10 @@ exit
233
279
  :filesize: 15
234
280
  }))
235
281
 
236
- x = folder.each(:folders,:files, :recursive)
237
- puts "=========="
238
- puts x.keys
239
- end
282
+ #~ x = folder.each(:folders,:files, :recursive)
283
+ #~ puts "=========="
284
+ puts folder.to_hash.to_yaml
285
+
286
+
287
+
288
+ end
@@ -97,5 +97,17 @@ Same as Helper#fixnum2int64, but as 4 bytes string.
97
97
  bindata
98
98
  end
99
99
 
100
+ =begin rdoc
101
+ Same as Helper#fixnum2int64, but as 2 bytes string.
102
+ =end
103
+ def fixnum2int16( int )
104
+ bindata = ''.force_encoding('BINARY')
105
+ ('%016b' % int).scan(/(\d{8})/).flatten.reverse.each{|b|
106
+ bindata << b.to_i(2)
107
+ }
108
+ raise ArgumentError unless bindata.size == 2 #int was too big
109
+ bindata
110
+ end
111
+
100
112
  end #Helper
101
113
  end #BC3
@@ -1,7 +1,11 @@
1
1
  =begin rdoc
2
+ Parser for BC3-Snapshots.
2
3
 
4
+ The snapshot may be compressed or uncompressed.
5
+
6
+ No support for UTF-8.
3
7
  =end
4
- $:.unshift('..')
8
+ $:.unshift('..') if $0 == __FILE__ #only test
5
9
  require 'bc3'
6
10
 
7
11
  require "zlib"
@@ -15,9 +19,24 @@ Parser for a given bcss-file.
15
19
 
16
20
  =end
17
21
  def initialize( filename )
18
- rawdata = nil
19
22
  @log = $log #fixme replace with sublogger
20
23
  @log.info("Read and parse #{filename}")
24
+ @timestamp = Time.now
25
+ case filename
26
+ when /\.ya?ml/
27
+ ::File.open(filename ){|f|
28
+ @snapshot = Snapshot.new_hash(YAML.read(f))
29
+ }
30
+ when /\.bcssx?/
31
+ read_bcss(filename)
32
+ else
33
+ raise ArgumentError, "Undefined filetype #{::File.extname(filename)}"
34
+ end
35
+ end
36
+ =begin rdoc
37
+ =end
38
+ def read_bcss(filename)
39
+ rawdata = nil
21
40
  ::File.open( filename, 'rb' ){|f|
22
41
  rawdata = f.read()
23
42
  }
@@ -43,7 +62,6 @@ Parser for a given bcss-file.
43
62
  [14..N] = Path (char[]) |
44
63
  =end
45
64
  #~ header = rawdata[0..17]
46
- @timestamp = Time.now
47
65
  @timestamp, tail = parse_filetime(rawdata[8,8])
48
66
  #Analyse flags - byte position 16/hex10
49
67
  @compressed = rawdata[16].getbyte(0) & 1 != 0
@@ -163,7 +181,7 @@ ID_FILE_EX (0x03)
163
181
  extradata, tail = parse_longstring(tail)
164
182
  extradata = parse_file_extended_headers(extradata)
165
183
  unless extradata #Skip at prob
166
- @log.warn("Skip #{filename}")
184
+ @log.warn("Skip #{filename} because of unsupported extended header")
167
185
  next
168
186
  end
169
187
  folderstack.last << File.new({
@@ -279,24 +297,27 @@ FILE_EX_LINK_PATH (0x03)
279
297
  =end
280
298
  def parse_file_extended_headers(extradata_string)
281
299
  extradata = {}
282
- case flag = extradata_string.slice!(0)
283
- when "\x01"
284
- extradata[:version] = parse_shortstring( extradata_string )
285
- when "\x02"
286
- @log.warn("Undefined extra data handling for UTF-8 encoded filename")
287
- return false #fixme
288
- when "\x03"
289
- @log.warn("Undefined extra data handling for UTF-8 encoded symbolic")
290
- return false #fixme
291
- else
292
- #fixme handling extradata_string
293
- @log.warn("Undefined extra data handling #{flag.inspect} <#{extradata_string.inspect}>")
294
- end
295
- unless extradata_string.empty?
296
- @log.warn("Undefined extra data handling <#{extradata_string.inspect}>")
297
- p extradata_string
298
- end
299
- extradata
300
+ #loop on extradata...
301
+ while !extradata_string.empty?
302
+ case flag = extradata_string.slice!(0)
303
+ when "\x01"
304
+ extradata[:version], extradata_string = parse_shortstring( extradata_string )
305
+ when "\x02" #UTF-8 encoded filename
306
+ extradata[:utfpath], extradata_string = parse_shortstring( extradata_string )
307
+ extradata[:utfpath].force_encoding('utf-8')
308
+ when "\x03" #UTF-8 encoded symbolic link path
309
+ extradata[:utfsymlink], extradata_string = parse_shortstring( extradata_string )
310
+ extradata[:utfsymlink].force_encoding('utf-8')
311
+ else
312
+ @log.error("Undefined extra data handling #{flag.inspect} <#{extradata_string.inspect}>")
313
+ extradata_string = '' #stop evaluation
314
+ end
315
+ end
316
+ #~ unless extradata_string.empty?
317
+ #~ @log.warn("Undefined extra data handling <#{extradata_string.inspect}>")
318
+ #~ p extradata_string
319
+ #~ end
320
+ extradata
300
321
  end
301
322
  end #SnapshotParser
302
323
  def to_hash; @snapshot.to_hash; end
@@ -305,8 +326,12 @@ end
305
326
  if $0 == __FILE__
306
327
  require 'yaml'
307
328
  #~ x = BC3::SnapshotParser.new('../../examples/results/testdir_2011-01-16.bcssx' )
308
- x = BC3::SnapshotParser.new('../../examples/results/bc3_2011-01-16.bcss' )
309
- #~ x = BC3::SnapshotParser.new('../../Uncompressed Sample/Uncompressed Sample.bcss' )
310
- puts x.snapshot.to_hash.to_yaml
329
+ #~ x = BC3::SnapshotParser.new('../../examples/results/bc3_2011-01-16.bcss' )
330
+ x = BC3::SnapshotParser.new('../../Uncompressed Sample/Uncompressed Sample.bcss' )
331
+ #~ puts x.snapshot.to_hash.to_yaml
311
332
  x.snapshot.save('../../Uncompressed Sample/Uncompressed Sample_reconstructed.bcss')
333
+ FileUtils.copy(
334
+ '../../Uncompressed Sample/Uncompressed Sample_reconstructed.bcss',
335
+ '../../Uncompressed Sample/Uncompressed Sample_reconstructed.xxxx'
336
+ )
312
337
  end