fsinv 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+
2
+ require 'fsinv'
3
+
4
+ module Fsinv
5
+
6
+ class DirectoryDescription < Fsinv::BaseDescription
7
+
8
+ include Fsinv
9
+
10
+ attr_accessor :file_count,:item_count,:file_list
11
+
12
+ def initialize(path, reduced_scan = false)
13
+
14
+ super(path,reduced_scan)
15
+
16
+ @file_list = []
17
+ @file_count = 0
18
+ @item_count = 0
19
+ end # initialize
20
+
21
+ def to_hash
22
+ h = { "type" => "directory" }
23
+ h = h.merge(super.to_hash)
24
+ h["file_count"] = @file_count
25
+ h["item_count"] = @item_count
26
+ h["file_list"] = @file_list
27
+ return h
28
+ end # to_hash
29
+
30
+ def as_json(options = { })
31
+ return to_hash
32
+ end
33
+
34
+ def to_json(*a)
35
+ return as_json.to_json(*a)
36
+ end
37
+
38
+ def marshal_dump
39
+ h = self.to_json
40
+ h.delete("type")
41
+ return h
42
+
43
+ end
44
+
45
+ def marshal_load(data)
46
+ self.path = data['path']
47
+ self.bytes = data['bytes']
48
+ self.ctime = data['ctime'] if data['ctime'].exists?
49
+ self.mtime = data['mtime'] if data['mtime'].exists?
50
+ self.file_count = data['file_count'] if data['file_count'].exists?
51
+ self.item_count = data['item_count'] if data['item_count'].exists?
52
+ self.osx_tags = data['osx_tags'] if data['osx_tags'].exists?
53
+ self.fshugo_tags = data['fshugo_tags'] if data['fshugo_tags'].exists?
54
+ self.file_list = data['file_list']
55
+ end
56
+
57
+ end # DirectoryDefinition
58
+
59
+ end
@@ -0,0 +1,130 @@
1
+
2
+ require 'fsinv'
3
+
4
+ module Fsinv
5
+
6
+ class FileDescription < Fsinv::BaseDescription
7
+
8
+ include Fsinv
9
+
10
+ attr_accessor :mimetype,:magicdescr,:crc32,:md5
11
+
12
+ def initialize(path, reduced_scan = false)
13
+
14
+ super(path,reduced_scan)
15
+
16
+ begin
17
+ @bytes = File.size(@path)
18
+ rescue
19
+ puts "error: exception getting size for file #{path}" if Fsinv.options[:verbose]
20
+ @bytes = 0
21
+ end
22
+
23
+ unless reduced_scan # don't do this if we only want to know file sizes (for pseudofiles, .git folders, etc)
24
+ @mimetype = get_mime_id
25
+ @magicdescr = get_magic_descr_ids
26
+ @crc32 = calc_crc32
27
+ @md5 = calc_md5
28
+ end
29
+ end # initialize
30
+
31
+ def to_hash
32
+ h = { "type" => "file" }
33
+ h = h.merge(super.to_hash)
34
+ h["mimetype"] = @mimetype unless @mimetype.nil?
35
+ h["magicdescr"] = @magicdescr unless @magicdescr.nil?
36
+ h["crc32"] = @crc32 unless @crc32.nil?
37
+ h["md5"] = @md5 unless @md5.nil?
38
+ return h
39
+ end # to_hash
40
+
41
+ def as_json(options = { })
42
+ return to_hash
43
+ end
44
+
45
+ def to_json(*a)
46
+ return as_json.to_json(*a )
47
+ end
48
+
49
+ def marshal_dump
50
+ h = self.to_json
51
+ h.delete("type")
52
+ return h
53
+ end
54
+
55
+ def marshal_load(data)
56
+ self.path = data['path']
57
+ self.bytes = data['bytes']
58
+ self.ctime = data['ctime'] if data['ctime'].exists?
59
+ self.mtime = data['mtime'] if data['mtime'].exists?
60
+ self.mimetype = data['mimetype'] if data['mimetype'].exists?
61
+ self.magicdescr = data['magicdescr'] if data['magicdescr'].exists?
62
+ self.crc32 = data["crc32"] if data['crc32'].exists?
63
+ self.md5 = data["md5"] if data['md5'].exists?
64
+ self.osx_tags = data['osx_tags'] if data['osx_tags'].exists?
65
+ self.fshugo_tags = data['fshugo_tags'] if data['fshugo_tags'].exists?
66
+ end
67
+
68
+ private
69
+ def get_mime_id
70
+ mime_type = ""
71
+ if /darwin/.match(RUBY_PLATFORM) # == osx
72
+ mime_type = %x{ file --mime -b \"#{@path}\" }.gsub("\n","")
73
+ else
74
+ begin
75
+ mime_type = MIME::Types.type_for(@path).join(', ')
76
+ rescue ArgumentError # if this happens you should definitly repair some file names
77
+ puts "error: mime type unavailable" unless Fsinv.options[:silent]
78
+ end
79
+ end
80
+
81
+ Fsinv.mime_tab.add(mime_type) unless Fsinv.mime_tab.contains?(mime_type)
82
+ return Fsinv.mime_tab.get_id(mime_type)
83
+ end
84
+
85
+ private
86
+ def get_magic_descr_ids
87
+ magic_descr = ""
88
+ if /darwin/.match(RUBY_PLATFORM) # == osx
89
+ magic_descr = %x{ file -b \"#{@path}\" }
90
+ else # any other system
91
+ begin
92
+ magic_descr = sanitize_string(Fsinv.fmagic.file(@path))
93
+ rescue
94
+ puts "error: file magic file information unavailable" unless Fsinv.options[:silent]
95
+ end
96
+ end
97
+
98
+ magic_descr = Fsinv.sanitize_string(magic_descr).gsub("\n","").gsub("\\\"", "'")
99
+
100
+ Fsinv.magic_tab.add(magic_descr) unless Fsinv.magic_tab.contains?(magic_descr)
101
+ return Fsinv.magic_tab.get_id(magic_descr)
102
+ end
103
+
104
+ private
105
+ def calc_crc32
106
+ if Fsinv.options[:crc32]
107
+ begin
108
+ return Digest::CRC32.file(@path).hexdigest
109
+ rescue
110
+ puts "error calculating crc32 for #{path}" if Fsinv.options[:verbose]
111
+ return nil
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ private
118
+ def calc_md5
119
+ if Fsinv.options[:md5]
120
+ begin
121
+ return Digest::MD5.file(@path).hexdigest
122
+ rescue
123
+ puts "error calculating md5 for #{path}" if Fsinv.options[:verbose]
124
+ return nil
125
+ end
126
+ end
127
+ end
128
+
129
+ end # FileDefinition
130
+ end
@@ -0,0 +1,72 @@
1
+
2
+ require 'fsinv'
3
+
4
+ module Fsinv
5
+
6
+ class Inventory
7
+
8
+ include Fsinv
9
+
10
+ attr_accessor :file_structure, :timestamp, :magic_tab, :mime_tab, :osx_tab, :fshugo_tab
11
+
12
+ def initialize(file_structure)
13
+ @file_structure = file_structure
14
+ @timestamp = Time.now
15
+ @magic_tab = Fsinv.magic_tab
16
+ @mime_tab = Fsinv.mime_tab
17
+ @osx_tab = Fsinv.osx_tab
18
+ @fshugo_tab = Fsinv.fshugo_tab
19
+ end
20
+
21
+ def size
22
+ size = 0
23
+ file_structure.each do |fs|
24
+ size += fs.bytes
25
+ end
26
+ return size
27
+ end
28
+
29
+ def item_count
30
+ count = 0
31
+ file_structure.each do |fs|
32
+ count += fs.item_count
33
+ end
34
+ return count
35
+ end
36
+
37
+ def to_hash
38
+ h = {
39
+ "timestamp" => @timestamp,
40
+ "file_structure" => @file_structure,
41
+ }
42
+ h["mime_tab"] = @mime_tab unless @mime_tab.empty?
43
+ h["magic_tab"] = @magic_tab unless @magic_tab.empty?
44
+ h["osx_tab"] = @osx_tab unless @osx_tab.empty?
45
+ h["fshugo_tab"] = @fshugo_tab unless @fshugo_tab.empty?
46
+ return h
47
+ end
48
+
49
+ def as_json(options = { })
50
+ return to_hash
51
+ end
52
+
53
+ def to_json(*a)
54
+ as_json.to_json(*a)
55
+ end
56
+
57
+ def marshal_dump
58
+ return to_hash
59
+ end
60
+
61
+ def marshal_load(data)
62
+ self.file_structure = data['file_structure'] if data['file_structure'].exists?
63
+ self.timestamp = data['timestamp'] if data['timestamp'].exists?
64
+ self.magic_tab = data['magic_tab'] if data['magic_tab'].exists?
65
+ self.mime_tab = data['mime_tab'] if data['mime_tab'].exists?
66
+ self.osx_tab = data['osx_tab'] if data['osx_tab'].exists?
67
+ self.fshugo_tab = data['fshugo_tab'] if data['fshugo_tab'].exists?
68
+ end
69
+
70
+ end
71
+
72
+ end
@@ -0,0 +1,77 @@
1
+ module Fsinv
2
+
3
+ class LookupTable
4
+
5
+ attr_accessor :val_map, :idcursor
6
+
7
+ def initialize
8
+ @val_map = Hash.new
9
+ @idcursor = 0
10
+ end
11
+
12
+ def contains?(value)
13
+ return value == "" ? false : @val_map.has_value?(value)
14
+ end
15
+
16
+ def add(value)
17
+ if self.contains?(value)
18
+ return get_id(value)
19
+ elsif value == "" || value == nil
20
+ return nil
21
+ else
22
+ @idcursor += 1
23
+ @val_map[@idcursor] = value
24
+ return @idcursor
25
+ end
26
+
27
+ end
28
+
29
+ def empty?
30
+ return @val_map.empty?
31
+ end
32
+
33
+ def get_id(value)
34
+ return self.contains?(value) ? @val_map.key(value) : nil
35
+ end
36
+
37
+ def get_value(id)
38
+ return self.contains?(value) ? @val_map[id] : nil
39
+ end
40
+
41
+ def to_a
42
+ table_arr = []
43
+ @val_map.each do | id, val |
44
+ table_arr << {"id" => id, "value" => val}
45
+ end
46
+ return table_arr
47
+ end
48
+
49
+ def from_json(json)
50
+ json.each do |entry|
51
+ self.add(entry["value"]) unless self.contains?(entry["value"])
52
+ end
53
+ end
54
+
55
+ def as_json(options = { })
56
+ return to_a
57
+ end
58
+
59
+ def to_json(*a)
60
+ return as_json.to_json(*a)
61
+ end
62
+
63
+ def marshal_dump
64
+ return {
65
+ 'val_map' => val_map,
66
+ 'idcursor' => idcursor
67
+ }
68
+ end
69
+
70
+ def marshal_load(data)
71
+ self.val_map = data['val_map']
72
+ self.idcursor = data['idcursor']
73
+ end
74
+
75
+ end # LookupTable
76
+
77
+ end
data/lib/fsinv.rb ADDED
@@ -0,0 +1,306 @@
1
+
2
+
3
+ # author: Maximilian Irro <max@disposia.org>, 2014
4
+
5
+ require 'pathname'
6
+
7
+ require 'fsinv/basedescription'
8
+ require 'fsinv/directorydescription'
9
+ require 'fsinv/filedescription'
10
+ require 'fsinv/inventory'
11
+ require 'fsinv/lookuptable'
12
+
13
+ begin
14
+ require 'mime/types'
15
+ rescue LoadError
16
+ puts "gem 'mime' required. Install it using 'gem install mime-types'"
17
+ exit
18
+ end
19
+
20
+ unless /darwin/.match(RUBY_PLATFORM) # == osx
21
+ begin
22
+ require 'filemagic'
23
+ rescue LoadError
24
+ puts "gem 'filemagic' required. Install it using 'gem install ruby-filemagic'"
25
+ puts "If you have trouble on OSX you may need to run 'brew install libmagic' before"
26
+ exit
27
+ end
28
+ end
29
+
30
+
31
+
32
+ module Fsinv
33
+
34
+ VERSION = '0.1.2'
35
+
36
+ # Kibibyte, Mebibyte, Gibibyte, etc... all the IEC sizes
37
+ BYTES_IN_KiB = 2**10
38
+ BYTES_IN_MiB = 2**20
39
+ BYTES_IN_GiB = 2**30
40
+ BYTES_IN_TiB = 2**40
41
+
42
+ # these define a KB as 1000 bits, according to the SI prefix
43
+ BYTES_IN_KB = 10**3
44
+ BYTES_IN_MB = 10**6
45
+ BYTES_IN_GB = 10**9
46
+ BYTES_IN_TB = 10**12
47
+
48
+ IGNORE_FILES = ['.AppleDouble','.Parent','.DS_Store','Thumbs.db','__MACOSX','.wine']
49
+
50
+ # calculate the sizes of these folders, yet do not write their content into the
51
+ # inventory index. these appear as files on osx (.app, .bundle)
52
+ PSEUDO_FILES = [
53
+ '.app',
54
+ '.bundle',
55
+ '.mbox', # osx mailbox exports
56
+ '.plugin',
57
+ '.sparsebundle',
58
+ '.abbu', # osx contact archive exports
59
+ '.mode' # SubEthaEdit and Coda modes
60
+ ]
61
+
62
+ class << self
63
+ attr_accessor :options, :fmagic, :mime_tab, :magic_tab, :osx_tab, :fshugo_tab
64
+ end
65
+
66
+ Fsinv.options = {}
67
+ Fsinv.fmagic = FileMagic.new unless /darwin/.match(RUBY_PLATFORM)
68
+ Fsinv.magic_tab = Fsinv::LookupTable.new
69
+ Fsinv.mime_tab = Fsinv::LookupTable.new
70
+ Fsinv.osx_tab = Fsinv::LookupTable.new
71
+ Fsinv.fshugo_tab = Fsinv::LookupTable.new
72
+
73
+ begin
74
+ require 'ffi-xattr'
75
+ Fsinv.options[:xattr] = true
76
+ rescue LoadError
77
+ puts "gem 'ffi-xattr' required. Install it using 'gem install ffi-xattr'"
78
+ Fsinv.options[:xattr] = false
79
+ #exit
80
+ end
81
+
82
+ module_function # all following methods will be callable from outside the module
83
+
84
+ # tries to handle various encoding problems encounterd with path strings
85
+ def sanitize_string(string)
86
+ return string.encode("UTF-16BE", :invalid=>:replace, :undef => :replace, :replace=>"?")
87
+ .encode("UTF-8")
88
+ .gsub(/[\u0080-\u009F]/) {|x| x.getbyte(1).chr.force_encoding('windows-1252').encode('utf-8') }
89
+ .gsub(/\"/, "\\\"") # escape double quotes in string
90
+ end
91
+
92
+ def pretty_SI_bytes(bytes)
93
+ return "%.1f TB" % (bytes.to_f / BYTES_IN_TB) if bytes > BYTES_IN_TB
94
+ return "%.1f GB" % (bytes.to_f / BYTES_IN_GB) if bytes > BYTES_IN_GB
95
+ return "%.1f MB" % (bytes.to_f / BYTES_IN_MB) if bytes > BYTES_IN_MB
96
+ return "%.1f KB" % (bytes.to_f / BYTES_IN_KB) if bytes > BYTES_IN_KB
97
+ return "#{bytes} B"
98
+ end
99
+
100
+ def pretty_IEC_bytes(bytes)
101
+ return "%.1f TiB" % (bytes.to_f / BYTES_IN_TiB) if bytes > BYTES_IN_TiB
102
+ return "%.1f GiB" % (bytes.to_f / BYTES_IN_GiB) if bytes > BYTES_IN_GiB
103
+ return "%.1f MiB" % (bytes.to_f / BYTES_IN_MiB) if bytes > BYTES_IN_MiB
104
+ return "%.1f KiB" % (bytes.to_f / BYTES_IN_KiB) if bytes > BYTES_IN_KiB
105
+ return "#{bytes} B"
106
+ end
107
+
108
+ #returns DirectoryDefinition object
109
+ def parse(folder_path, reduced_scan = false)
110
+
111
+ if IGNORE_FILES.include?(File.basename(folder_path))
112
+ # do nothing
113
+ elsif File.basename(folder_path)[0..1] == "._"
114
+ # these are some osx files no one cares about -> ignore
115
+ elsif PSEUDO_FILES.include?(File.extname(folder_path)) # stuff like .app, .bundle, .mbox etc.
116
+ puts "processing reduced_scan #{folder_path}" unless reduced_scan || Fsinv.options[:silent]
117
+ reduced_scan = true
118
+ elsif File.basename(folder_path)[0] == "."
119
+ puts "processing dotfile #{folder_path}" unless reduced_scan || Fsinv.options[:silent]
120
+ reduced_scan = true
121
+ else
122
+ puts "processing #{folder_path}/*" unless reduced_scan || Fsinv.options[:silent]
123
+ end
124
+
125
+ curr_dir = Fsinv::DirectoryDescription.new(folder_path, reduced_scan)
126
+
127
+ #begin
128
+ Pathname.new(folder_path).children.each { |f|
129
+ file = f.to_s.encode("UTF-8")
130
+ if IGNORE_FILES.include?(File.basename(file))
131
+ # do nothing
132
+ elsif File.directory?(file)
133
+ sub_folder = parse(file, reduced_scan)
134
+ curr_dir.bytes += sub_folder.bytes
135
+ curr_dir.file_list << sub_folder unless reduced_scan
136
+ curr_dir.item_count += 1 # count this directory as an item
137
+ curr_dir.item_count += sub_folder.item_count unless reduced_scan
138
+ else
139
+ puts "processing #{file}" if Fsinv.options[:verbose] && !reduced_scan && Fsinv.options[:silent].nil?
140
+ sub_file = Fsinv::FileDescription.new(file, reduced_scan)
141
+ curr_dir.bytes += sub_file.bytes
142
+ curr_dir.file_list << sub_file unless reduced_scan
143
+ curr_dir.item_count += 1 unless reduced_scan
144
+ end
145
+ }
146
+ #rescue
147
+ #puts "permission denied: #{folder_path}" unless Fsinv.options[:silent]
148
+ #end
149
+
150
+ return curr_dir
151
+ end # parse
152
+
153
+
154
+ def filestructure_to_db(structitem)
155
+
156
+ h = {
157
+ :path => structitem.path,
158
+ :bytes => structitem.bytes,
159
+ :ctime => structitem.ctime,
160
+ :mtime => structitem.mtime
161
+ }
162
+
163
+ case structitem
164
+ when DirectoryDescription
165
+ h[:entity_type] = "directory"
166
+ h[:file_count] = structitem.file_count
167
+ h[:item_count] = structitem.item_count
168
+ when FileDescription
169
+ h[:entity_type] = "file"
170
+
171
+ mime_descr = Fsinv.mime_tab.get_value(structitem.mimetype)
172
+ mime_id = MimeType.where(:mimetype => mime_descr).ids.first
173
+ h[:mimetype] = mime_id
174
+
175
+ magic_descr = Fsinv.magic_tab.get_value(structitem.magicdescr)
176
+ magic_id = MagicDescription.where(:magicdescr => magic_descr).ids.first
177
+ h[:magicdescr] = magic_id
178
+ end
179
+
180
+ osx_tags = [] # will be array of db ids
181
+ unless structitem.osx_tags.nil?
182
+ structitem.osx_tags.each do |json_id|
183
+ tag = Fsinv.osx_tab.get_value(json_id)
184
+ osx_tags << OsxTag.where(:tag => tag).ids.first
185
+ end
186
+ end
187
+ h[:osx_tags] = osx_tags
188
+
189
+ fshugo_tags = [] # will be array of db ids
190
+ unless structitem.fshugo_tags.nil?
191
+ structitem.fshugo_tags.each do |json_id|
192
+ tag = Fsinv.fshugo_tab.get_value(json_id)
193
+ fshugo_tags << FshugoTag.where(:tag => tag).ids.first
194
+ end
195
+ end
196
+ h[:fshugo_tags] = fshugo_tags
197
+
198
+ FileStructure.create(h)
199
+
200
+ structitem.file_list.each { |child| filestructure_to_db(child) } if h[:entity_type] == "directory"
201
+
202
+ end
203
+
204
+
205
+ def inventory_to_json(inventory)
206
+ json_data = nil
207
+ begin
208
+ require 'json'
209
+ json_data = JSON.parse(inventory.to_json(max_nesting: 100))
210
+ json_data = JSON.pretty_generate(json_data, :max_nesting => 100)
211
+ rescue LoadError
212
+ puts "gem 'json' needed for JSON creation. Install using 'gem install json'"
213
+ end
214
+ return json_data
215
+ end
216
+
217
+
218
+ def filestructure_to_xml(xml, defobj)
219
+ case defobj
220
+ when DirectoryDescription
221
+ xml.directory{
222
+ xml.path(defobj.path)
223
+ xml.bytes(defobj.bytes)
224
+ xml.file_count(defobj.file_count)
225
+ xml.item_count(defobj.item_count)
226
+ xml.file_list {
227
+ defobj.file_list.each do |child|
228
+ filestructure_to_xml(xml, child)
229
+ end
230
+ }
231
+ }
232
+ when FileDescription
233
+ xml.file{
234
+ xml.path(defobj.path)
235
+ xml.bytes(defobj.bytes)
236
+ xml.mimetype(defobj.mimetype)
237
+ xml.magicdescr(defobj.magicdescr)
238
+ }
239
+ end
240
+ end
241
+
242
+
243
+ def inventory_to_xml(inventory)
244
+ xml_data = nil
245
+ begin
246
+ require 'nokogiri'
247
+ builder = Nokogiri::XML::Builder.new do |xml|
248
+ xml.inventory{
249
+ #output the file structure
250
+ xml.file_structure{
251
+ inventory.file_structure.each do |fstruct|
252
+ filestructure_to_xml(xml, fstruct)
253
+ end
254
+ }
255
+ #output the magic tab
256
+ xml.magic_tab{
257
+ inventory.magic_tab.val_map.each{ |id, val|
258
+ xml.item{
259
+ xml.id(id)
260
+ xml.value(val)
261
+ } } }
262
+ #ouput the mime tab
263
+ xml.mime_tab{
264
+ inventory.mime_tab.val_map.each{ |id, val|
265
+ xml.item{
266
+ xml.id(id)
267
+ xml.value(val)
268
+ } } }
269
+
270
+ xml.osx_tab{
271
+ inventory.osx_tab.val_map.each{ |id, val|
272
+ xml.item{
273
+ xml.id(id)
274
+ xml.value(val)
275
+ } } }
276
+
277
+ xml.fshugo_tab{
278
+ inventory.fshugo_tab.val_map.each{ |id, val|
279
+ xml.item{
280
+ xml.id(id)
281
+ xml.value(val)
282
+ } } }
283
+ }
284
+ end
285
+ xml_data = builder.to_xml
286
+ rescue LoadError
287
+ puts "gem 'nokogiri' needed for XML creation. Install using 'gem install nokogiri'"
288
+ end
289
+ return xml_data
290
+ end
291
+
292
+
293
+ def inventory_to_yaml(inventory)
294
+ yml_data = nil
295
+ begin
296
+ require 'yaml'
297
+ yml_data = YAML::dump(inventory)
298
+ rescue LoadError
299
+ puts "gem 'yaml' needed for YAML creation. Install using 'gem install yaml'"
300
+ end
301
+ return yml_data
302
+ end
303
+ end # Fsinv
304
+
305
+
306
+