bc3 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/bc3.rb CHANGED
@@ -34,15 +34,13 @@ Prepare a virtual folder structure as you would like to have
34
34
  and use BC3 to convert your real structure to your wishes.
35
35
 
36
36
  ==Scan for identic files.
37
- <em>
38
- The only way to find duplicate files is to flatten your folder structure so that all the files appear to be at the same level. You then can enable/show the CRC column, sort by it, then manually scan the file list for files with the same size and CRC code. I wouldn't advise it for a large number of files and folders, though. It could take a while to calculate all the CRC values, and it would be a tedious process to manually look through them all for dups.
39
- </em>
40
- http://www.scootersoftware.com/vbulletin/showpost.php?p=27736&postcount=4
37
+ See also bin/bc3_search.rb
41
38
 
42
39
  With this gem, you can analyse a snapshot, copy the data in a hash
43
40
  and search for duplicate CRCs
44
41
 
45
42
 
43
+
46
44
  =Warning and Restrictions
47
45
  Please don't
48
46
  require 'bc3'
@@ -69,8 +67,10 @@ Maximum 2GB (more is supported by BC3, but not by this gem).
69
67
  =end
70
68
  require 'date'
71
69
  require 'zlib'
70
+ require 'yaml'
72
71
 
73
72
  require 'log4r'
73
+
74
74
  $log = Log4r::Logger.new('BC3')
75
75
  $log.outputters = Log4r::StdoutOutputter.new('log_stdout')
76
76
  $log.level = Log4r::INFO
@@ -85,5 +85,5 @@ require 'bc3/time'
85
85
  require 'bc3/parse'
86
86
 
87
87
  module BC3
88
- VERSION = '0.1.0'
88
+ VERSION = '0.1.1'
89
89
  end
@@ -13,7 +13,7 @@ If you need the original File inside BC3-module, use
13
13
  KEYS = [:filename, :filesize ]
14
14
  KEYS_OPTIONAL = [
15
15
  :timestamp, :crc, :attributes,
16
- :version,
16
+ :version, :utfpath, :utfsymlink,
17
17
  ]
18
18
 
19
19
  =begin rdoc
@@ -23,7 +23,7 @@ Arguments are given in a hash.
23
23
  Must contain KEYS and supports KEYS_OPTIONAL
24
24
  =end
25
25
  def initialize( args )
26
- raise ArgumentError unless args.kind_of?(Hash)
26
+ raise ArgumentError, "No hash given (#{args.inspect})" unless args.kind_of?(Hash)
27
27
  KEYS.each{|key|
28
28
  raise ArgumentError, "Missing Key #{key}" unless args.has_key?(key)
29
29
  }
@@ -38,7 +38,10 @@ Must contain KEYS and supports KEYS_OPTIONAL
38
38
  @attributes = args[:attributes] || Attrib::Archive
39
39
 
40
40
  @version = args[:version] #-> Extended File
41
+ @utfpath = args[:utfpath] #-> Extended File
42
+ @utfsymlink = args[:utfsymlink] #-> Extended File
41
43
 
44
+ @snapshotpath = {}
42
45
  #Test content
43
46
  raise ArgumentError, "timestamp is no time-object" unless @timestamp.kind_of?(Time)
44
47
  end
@@ -52,7 +55,7 @@ The argument must contain:
52
55
  def self.new_by_filename( filename )
53
56
  $log.debug("Build file #{filename} from file system")
54
57
 
55
- #fixme: attrib
58
+ #fixme: get attributes from file system
56
59
  #~ p ::File.stat(filename)
57
60
  #~ Get attributes: http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/68298
58
61
 
@@ -69,16 +72,27 @@ The argument must contain:
69
72
  } #fixme crc if ...
70
73
  return File.new( settings )
71
74
  end
75
+ #Name of the file
72
76
  attr_reader :filename
77
+ #Name of the file
78
+ alias :basename :filename
79
+ attr_reader :crc
80
+ #Path (location) in snapshots
81
+ attr_reader :snapshotpath
73
82
 
74
83
  def to_hash()
75
- {
84
+ hash = {
76
85
  filename: @filename,
77
86
  filesize: @filesize,
78
87
  crc: @crc,
79
88
  attributes: @attributes,
80
- timestamp: @timestamp
89
+ timestamp: @timestamp,
81
90
  }
91
+ hash[:version] = @version if @version
92
+ hash[:utfpath] = @utfpath if @utfpath
93
+ hash[:utfsymlink] = @utfsymlink if @utfsymlink
94
+
95
+ hash
82
96
  end
83
97
  def inspect()
84
98
  "<BC3::File #{@filename}>"
@@ -99,7 +113,11 @@ ID_FILE (0x02)
99
113
  =end
100
114
  def bcss()
101
115
  bcss = "".force_encoding('BINARY')
102
- bcss << 2
116
+ if @version or @utfpath or @utfsymlink
117
+ bcss << 3 #with file extended_headers
118
+ else
119
+ bcss << 2
120
+ end
103
121
  bcss << @filename.size
104
122
  bcss << @filename
105
123
  bcss << fixnum2int64(@timestamp.time2ad)
@@ -113,8 +131,69 @@ ID_FILE (0x02)
113
131
 
114
132
  #CRC32
115
133
  bcss << fixnum2int32(@crc || 0)
116
- #~ bcss << 255
134
+ #extended header
135
+ bcss << bcss_extended_headers if @version or @utfpath or @utfsymlink
136
+
117
137
  bcss
118
138
  end
119
- end #Folder
139
+ =begin rdoc
140
+
141
+ ID_FILE (0x02) +
142
+ ExtraLen : UInt16
143
+ ExtraData : Byte[ExtraLen]
144
+
145
+ =====================
146
+ File Extended Headers
147
+ =====================
148
+
149
+ Like extended headers, file extended headers should be written in ascending
150
+ numeric order. Multiple headers may occur within a single ID_FILE_EX record,
151
+ and compliant parsers should break once they read a type they don't recognize.
152
+
153
+ FILE_EX_VERSION (0x01)
154
+ String representation of an executable file's Major/Minor/Maint/Build
155
+ version (e.g., "2.11.28.3542").
156
+
157
+ Length : UByte
158
+ Data : char[Length]
159
+
160
+
161
+ FILE_EX_UTF8 (0x02)
162
+ UTF-8 encoded filename. Stored as a FileExString. Only used if the UTF-8
163
+ name doesn't match the ANSI encoded one or if the filename is longer than 255
164
+ characters.
165
+
166
+
167
+ FILE_EX_LINK_PATH (0x03)
168
+ UTF-8 encoded symbolic link path. Stored as a FileExString.
169
+
170
+ =end
171
+ def bcss_extended_headers()
172
+ ehead = ''
173
+ if @version
174
+ ehead << 1
175
+ ehead << @version.size
176
+ ehead << @version
177
+ end
178
+ if @utfpath
179
+ ehead << 2
180
+ utfpath = @utfpath.dup.force_encoding('binary')
181
+ ehead << utfpath.size
182
+ ehead << utfpath
183
+ end
184
+ if @utfsymlink
185
+ ehead << 3
186
+ utfsymlink = @utfsymlink.dup.force_encoding('binary')
187
+ ehead << utfsymlink.size
188
+ ehead << utfsymlink
189
+ end
190
+ #fixme
191
+
192
+ bcss = ''
193
+ bcss << fixnum2int16(ehead.size + 2) #size including this length
194
+ bcss << ehead
195
+
196
+ bcss
197
+ end
198
+ end #File
120
199
  end #BC3
@@ -10,7 +10,7 @@ module BC3
10
10
  def initialize( dirname, timestamp = Time.now, attributes = nil )
11
11
  @dirname = dirname
12
12
  $log.debug("Create folder #{@dirname}")
13
- raise ArgumentError unless @dirname.is_a?(String)
13
+ raise ArgumentError, "Dirname no string #{@dirname.inspect}" unless @dirname.is_a?(String)
14
14
 
15
15
  @timestamp = timestamp || Time.now
16
16
  raise ArgumentError, "No Timestamp for Folder" unless @timestamp.is_a?(Time)
@@ -18,6 +18,8 @@ module BC3
18
18
  @attributes = attributes || Attrib::Directory
19
19
 
20
20
  @content = {}
21
+ @snapshotpath = {}
22
+
21
23
  end
22
24
  =begin rdoc
23
25
  Create a folder from a hash.
@@ -81,42 +83,84 @@ The argument must contain:
81
83
  end
82
84
  #Name of the folder
83
85
  attr_reader :dirname
86
+ #Name of the folder
87
+ alias :basename :dirname
88
+ #Path (location) in snapshots
89
+ attr_reader :snapshotpath
90
+ #timestamp
91
+ attr_reader :timestamp
92
+ attr_reader :attributes
93
+
84
94
  =begin rdoc
85
- Add content to Folder.
95
+ Add content to folder.
86
96
  The 'content' must support the bcss-method.
87
97
 
88
98
  Content may be:
89
- * another folder
99
+ * another folder (if pathes are similar to existing folder: see #mixin)
90
100
  * a file
101
+
102
+ If another object with similar name exists, it will be overwritten.
91
103
  =end
92
104
  def << (content)
93
- if !content.respond_to?( :bcss )
105
+ #Check for BC3::File or BC3::Folder
106
+ if ! ( content.respond_to?( :bcss ) and content.respond_to?( :basename ) )
94
107
  $log.error("Add unknown datatype as content #{content.inspect}")
95
108
  raise ArgumentError, "Add unknown datatype as content #{content.inspect}"
96
109
  end
97
110
 
98
- key = content.respond_to?(:dirname) ? content.dirname : content.filename
99
- if @content[key] #content already available
100
- if @content[key].respond_to? :dirname and content.respond_to? :dirname
101
- $log.debug("Mix into folder #{key} in folder #{self.inspect}")
102
- @content[key].mixin(content)
111
+ #reset index in relatd snapshot.
112
+ self.snapshotpath.each{|snapshot, path| snapshot.reset_index }
113
+
114
+ if @content[content.basename] #content already available
115
+ #Target and new element are both folders?
116
+ if @content[content.basename].respond_to? :dirname and content.respond_to? :dirname
117
+ $log.debug("Mix into folder #{content.basename} in folder #{self.inspect}")
118
+ #return the folder, where it is mixed in
119
+ content = @content[content.basename].mixin(content)
103
120
  else
104
- $log.warn("Overwrite #{key} in folder #{self.inspect}")
105
- @content[key] = content
121
+ $log.info("Overwrite #{content.basename} in folder #{self.inspect}")
122
+ @content[content.basename] = content
106
123
  end
107
- else
108
- @content[key] = content
124
+ else #add new content
125
+ @content[content.basename] = content
109
126
  $log.debug("Add to folder #{self.inspect}: #{content.inspect}") if $log.debug?
110
127
  end
128
+ content #return the folder, where it is mixed in
111
129
  end
112
130
  =begin rdoc
113
131
  Merge two folders.
114
132
  The content of a folder is added to the actual content.
115
133
  =end
116
134
  def mixin( folder )
117
- $log.info("Mix in content to folder #{@dirname} in folder #{self.inspect}")
135
+ $log.debug("Mix in content to folder #{@dirname} in folder #{self.inspect}")
136
+ raise ArgumentError, "Add unknown datatype as content #{content.inspect}" unless folder.is_a?(Folder)
118
137
  folder.each{|key, content| self << content }
138
+ self #return the folder, where it is mixed in
139
+ end
140
+ =begin rdoc
141
+ Add an element with a given path.
142
+
143
+ Folders will be created for the complete path if necessary.
144
+ =end
145
+ def add_with_path(path, element)
146
+ path = path.split(%r{[\\\/]}) unless path.is_a?(Array)
147
+ if path.empty?
148
+ self << element
149
+ else #subpath
150
+ #Search target folder
151
+ target = nil
152
+ @content.each{|name, folder|
153
+ if name == path.first
154
+ target = folder
155
+ break #target found
156
+ end
157
+ }
158
+ self << target = Folder.new(path.first) unless target
159
+ path.shift
160
+ target.add_with_path( path, element )
119
161
  end
162
+ element
163
+ end
120
164
  =begin rdoc
121
165
  Loop on content of the folder and return path + object.
122
166
 
@@ -127,27 +171,26 @@ Options may be
127
171
  The result is not nested, pathes are complete
128
172
  =end
129
173
  def each(*options)
130
- if options.empty?
131
- extract = @content #take all
132
- else
133
- extract = {}
134
- @content.each{|key, content|
135
- case content
136
- when File
137
- extract[key] = content if options.include?(:files)
138
- when Folder
139
- extract[key] = content if options.include?(:folders)
140
- else
141
- raise "Internal error"
142
- end
143
-
144
- if options.include?(:recursive) and content.is_a?(Folder)
145
- content.each(*options).each{|subkey, subcontent|
146
- extract[[key, subkey].join('/')] = subcontent
147
- }
148
- end
149
- }
150
- end
174
+
175
+ options = [:folders, :files ] if options.empty? #take all, but not recursive
176
+
177
+ extract = {}
178
+ @content.each{|key, content|
179
+ case content
180
+ when File
181
+ extract[key] = content if options.include?(:files)
182
+ when Folder
183
+ extract[key+'/'] = content if options.include?(:folders)
184
+ else
185
+ raise "Internal error"
186
+ end
187
+
188
+ if options.include?(:recursive) and content.is_a?(Folder)
189
+ content.each(*options).each{|subkey, subcontent|
190
+ extract[[key, subkey].join('/')] = subcontent
191
+ }
192
+ end
193
+ }
151
194
 
152
195
  if block_given?
153
196
  extract.each{|key, content| yield key, content }
@@ -155,6 +198,7 @@ Options may be
155
198
  extract
156
199
  end
157
200
  end
201
+
158
202
  #~ def values();@content.values;end #fixme
159
203
  def inspect()
160
204
  "<BC3::Folder #{@dirname}>"
@@ -211,12 +255,14 @@ end #BC3
211
255
 
212
256
  if $0 == __FILE__
213
257
 
214
- folder = BC3::Folder.new('x')
215
- folder << BC3::Folder.new('x')
216
- folder << BC3::File.new( filename: 'x2', filesize: 1)
217
- folder << BC3::File.new( filename: 'x', filesize: 1)
258
+ folder = BC3::Folder.new('base')
259
+ #~ folder << BC3::Folder.new('dir')
260
+ #~ folder << BC3::File.new( filename: 'x2', filesize: 1)
261
+ #~ folder << BC3::File.new( filename: 'x', filesize: 1)
262
+ #~ folder.add_with_path('dir2', BC3::File.new(filename: 'test.txt', filesize: 5 ))
263
+ folder.add_with_path('dir2/subdir', BC3::File.new(filename: 'test,txt', filesize: 5 ))
218
264
 
219
- puts folder.each
265
+ puts folder.to_hash.to_yaml
220
266
  exit
221
267
 
222
268
  require 'YAML'
@@ -233,7 +279,10 @@ exit
233
279
  :filesize: 15
234
280
  }))
235
281
 
236
- x = folder.each(:folders,:files, :recursive)
237
- puts "=========="
238
- puts x.keys
239
- end
282
+ #~ x = folder.each(:folders,:files, :recursive)
283
+ #~ puts "=========="
284
+ puts folder.to_hash.to_yaml
285
+
286
+
287
+
288
+ end
@@ -97,5 +97,17 @@ Same as Helper#fixnum2int64, but as 4 bytes string.
97
97
  bindata
98
98
  end
99
99
 
100
+ =begin rdoc
101
+ Same as Helper#fixnum2int64, but as 2 bytes string.
102
+ =end
103
+ def fixnum2int16( int )
104
+ bindata = ''.force_encoding('BINARY')
105
+ ('%016b' % int).scan(/(\d{8})/).flatten.reverse.each{|b|
106
+ bindata << b.to_i(2)
107
+ }
108
+ raise ArgumentError unless bindata.size == 2 #int was too big
109
+ bindata
110
+ end
111
+
100
112
  end #Helper
101
113
  end #BC3
@@ -1,7 +1,11 @@
1
1
  =begin rdoc
2
+ Parser for BC3-Snapshots.
2
3
 
4
+ The snapshot may be compressed or uncompressed.
5
+
6
+ No support for UTF-8.
3
7
  =end
4
- $:.unshift('..')
8
+ $:.unshift('..') if $0 == __FILE__ #only test
5
9
  require 'bc3'
6
10
 
7
11
  require "zlib"
@@ -15,9 +19,24 @@ Parser for a given bcss-file.
15
19
 
16
20
  =end
17
21
  def initialize( filename )
18
- rawdata = nil
19
22
  @log = $log #fixme replace with sublogger
20
23
  @log.info("Read and parse #{filename}")
24
+ @timestamp = Time.now
25
+ case filename
26
+ when /\.ya?ml/
27
+ ::File.open(filename ){|f|
28
+ @snapshot = Snapshot.new_hash(YAML.read(f))
29
+ }
30
+ when /\.bcssx?/
31
+ read_bcss(filename)
32
+ else
33
+ raise ArgumentError, "Undefined filetype #{::File.extname(filename)}"
34
+ end
35
+ end
36
+ =begin rdoc
37
+ =end
38
+ def read_bcss(filename)
39
+ rawdata = nil
21
40
  ::File.open( filename, 'rb' ){|f|
22
41
  rawdata = f.read()
23
42
  }
@@ -43,7 +62,6 @@ Parser for a given bcss-file.
43
62
  [14..N] = Path (char[]) |
44
63
  =end
45
64
  #~ header = rawdata[0..17]
46
- @timestamp = Time.now
47
65
  @timestamp, tail = parse_filetime(rawdata[8,8])
48
66
  #Analyse flags - byte position 16/hex10
49
67
  @compressed = rawdata[16].getbyte(0) & 1 != 0
@@ -163,7 +181,7 @@ ID_FILE_EX (0x03)
163
181
  extradata, tail = parse_longstring(tail)
164
182
  extradata = parse_file_extended_headers(extradata)
165
183
  unless extradata #Skip at prob
166
- @log.warn("Skip #{filename}")
184
+ @log.warn("Skip #{filename} because of unsupported extended header")
167
185
  next
168
186
  end
169
187
  folderstack.last << File.new({
@@ -279,24 +297,27 @@ FILE_EX_LINK_PATH (0x03)
279
297
  =end
280
298
  def parse_file_extended_headers(extradata_string)
281
299
  extradata = {}
282
- case flag = extradata_string.slice!(0)
283
- when "\x01"
284
- extradata[:version] = parse_shortstring( extradata_string )
285
- when "\x02"
286
- @log.warn("Undefined extra data handling for UTF-8 encoded filename")
287
- return false #fixme
288
- when "\x03"
289
- @log.warn("Undefined extra data handling for UTF-8 encoded symbolic")
290
- return false #fixme
291
- else
292
- #fixme handling extradata_string
293
- @log.warn("Undefined extra data handling #{flag.inspect} <#{extradata_string.inspect}>")
294
- end
295
- unless extradata_string.empty?
296
- @log.warn("Undefined extra data handling <#{extradata_string.inspect}>")
297
- p extradata_string
298
- end
299
- extradata
300
+ #loop on extradata...
301
+ while !extradata_string.empty?
302
+ case flag = extradata_string.slice!(0)
303
+ when "\x01"
304
+ extradata[:version], extradata_string = parse_shortstring( extradata_string )
305
+ when "\x02" #UTF-8 encoded filename
306
+ extradata[:utfpath], extradata_string = parse_shortstring( extradata_string )
307
+ extradata[:utfpath].force_encoding('utf-8')
308
+ when "\x03" #UTF-8 encoded symbolic link path
309
+ extradata[:utfsymlink], extradata_string = parse_shortstring( extradata_string )
310
+ extradata[:utfsymlink].force_encoding('utf-8')
311
+ else
312
+ @log.error("Undefined extra data handling #{flag.inspect} <#{extradata_string.inspect}>")
313
+ extradata_string = '' #stop evaluation
314
+ end
315
+ end
316
+ #~ unless extradata_string.empty?
317
+ #~ @log.warn("Undefined extra data handling <#{extradata_string.inspect}>")
318
+ #~ p extradata_string
319
+ #~ end
320
+ extradata
300
321
  end
301
322
  end #SnapshotParser
302
323
  def to_hash; @snapshot.to_hash; end
@@ -305,8 +326,12 @@ end
305
326
  if $0 == __FILE__
306
327
  require 'yaml'
307
328
  #~ x = BC3::SnapshotParser.new('../../examples/results/testdir_2011-01-16.bcssx' )
308
- x = BC3::SnapshotParser.new('../../examples/results/bc3_2011-01-16.bcss' )
309
- #~ x = BC3::SnapshotParser.new('../../Uncompressed Sample/Uncompressed Sample.bcss' )
310
- puts x.snapshot.to_hash.to_yaml
329
+ #~ x = BC3::SnapshotParser.new('../../examples/results/bc3_2011-01-16.bcss' )
330
+ x = BC3::SnapshotParser.new('../../Uncompressed Sample/Uncompressed Sample.bcss' )
331
+ #~ puts x.snapshot.to_hash.to_yaml
311
332
  x.snapshot.save('../../Uncompressed Sample/Uncompressed Sample_reconstructed.bcss')
333
+ FileUtils.copy(
334
+ '../../Uncompressed Sample/Uncompressed Sample_reconstructed.bcss',
335
+ '../../Uncompressed Sample/Uncompressed Sample_reconstructed.xxxx'
336
+ )
312
337
  end