filecamo 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cc867e4a005a911f06b95fc7a483ab8321a517c3
4
+ data.tar.gz: 8229c95f127d9e938ff75ea31c24a817a30099d3
5
+ SHA512:
6
+ metadata.gz: 2a1af293cb02a0e77998337a2c75e348f0578c3abcedf93c421ea7de885ec207141112f2465dc35ed52e7be08ed8d32f042002ae10c5c4a2d37b73341996d8fc
7
+ data.tar.gz: 3f3a3363e9d00aa1006e3174ff700d643449bdf13541a892ba65f274004736d1fa3ecb9fc07ff700afd8b2afe87cf741ff9580a5db37898ae0d53072d9e86952
data/README.md ADDED
@@ -0,0 +1 @@
1
+ # filecamo
data/bin/filecamo ADDED
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/filecamo'
4
+
5
+ BN = File.basename($0, '.rb')
6
+
7
+ cmd = ARGV.shift
8
+ if cmd == 'gen'
9
+ if i = ARGV.index('-d')
10
+ ARGV.delete_at(i)
11
+ dst_path = ARGV.delete_at(i)
12
+ end
13
+
14
+ if ARGV.size < 2
15
+ bin, bm = BetterBytes::BINARY
16
+ dec, dm = BetterBytes::DECIMAL
17
+
18
+ $stderr.puts <<EOF
19
+
20
+ usage: #{BN} gen [-d <destination_path>] <minimum_size> <maximum_size> <count> <depth> [<percent_text>]
21
+
22
+ The size MAY end with a unit label:
23
+ * BINARY labels (#{bm} multiplier): #{bin.join(', ')}
24
+ * DECIMAL labels (#{dm} multiplier): #{dec.join(', ')}
25
+
26
+ The count will determine the number of files to generate.
27
+
28
+ The depth will determine the hierarchy of the files. The number of files will be randomly
29
+ distributed among the tree.
30
+
31
+ The percent text optional value indicates how many files should be text instead of binary.
32
+ Default is none (i.e. all files have random binary content).
33
+
34
+ EOF
35
+ exit 1
36
+ end
37
+
38
+ min, max, count, depth, percent_text = ARGV
39
+ generator = Filecamo::Generator.new
40
+ generator.generate(min, max, count, depth,
41
+ percent_text: percent_text, destination_path: dst_path) do |path, len|
42
+ puts "#{BetterBytes.humanize(len, precision: 6.1)} => #{path}"
43
+ end
44
+ generator.wait do |txt_count, bin_count|
45
+ $stdout.printf "\rWaiting for #{txt_count} txt and #{bin_count} bin jobs "\
46
+ "to complete..." + (' '*10)
47
+ $stdout.flush
48
+ end
49
+ $stdout.puts
50
+ generator.kill
51
+
52
+ elsif cmd == 'muck'
53
+ if ARGV.size < 3
54
+ $stderr.puts <<EOF
55
+
56
+ usage: #{BN} muck <percent_select> <percent_change> <path> [<path>...]
57
+
58
+ The percent select should be a value from 1 to 100 indicating how likely a given file will be
59
+ modified.
60
+
61
+ The percent change should be a value from 1 to 100 indicating (roughly) how many lines should
62
+ be added.
63
+
64
+ Paths can be filenames or directories to recurse and will be descended breadth-first. Any file
65
+ entry starting with '.' is skipped (ignored).
66
+
67
+ EOF
68
+ exit 1
69
+ end
70
+
71
+ percent_select, percent_change, *paths = ARGV
72
+ mucker = Filecamo::TextMucker.new(" #{BN}: ")
73
+ mucker.muck(percent_select, percent_change, paths) do |fn, lang, line_nums|
74
+ puts "Modified #{fn} (#{lang}): #{line_nums.join(',')}"
75
+ end
76
+
77
+ else
78
+ $stderr.puts <<EOF
79
+
80
+ usage: #{BN} { gen | muck } ...
81
+
82
+ EOF
83
+ exit 1
84
+ end
data/filecamo.gempec ADDED
@@ -0,0 +1,15 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'filecamo'
3
+ s.version = File.readlines('lib/filecamo.rb').grep(/VERSION/){|v|v.match(/'([^']+)'/)[1]}[0]
4
+ s.summary = 'File content generator and manipulator.'
5
+ s.description = 'Generate and manipulate entire directory trees of either binary or text file content.'
6
+ s.authors = ['Brad Robel-Forrest']
7
+ s.email = 'brad@bitpony.com'
8
+ s.files = `git ls-files -z`.split("\x0")
9
+ s.executables = ['filecamo']
10
+ s.homepage = 'https://github.com/bradrf/filecamo#readme'
11
+ s.license = 'MIT'
12
+ s.add_runtime_dependency 'better_bytes', '~> 0.0.1'
13
+ s.add_runtime_dependency 'literate_randomizer', '~> 0.4.0'
14
+ s.add_runtime_dependency 'ruby-filemagic', '~> 0.7.1'
15
+ end
data/lib/filecamo.rb ADDED
@@ -0,0 +1,6 @@
1
+ module Filecamo
2
+ VERSION = '0.0.1'
3
+ end
4
+
5
+ require_relative './filecamo/generator'
6
+ require_relative './filecamo/text_mucker'
@@ -0,0 +1,121 @@
1
+ require 'logger'
2
+ require 'thread'
3
+ require 'thwait'
4
+ require 'pathname'
5
+ require 'better_bytes'
6
+ require 'literate_randomizer'
7
+
8
+ module Filecamo
9
+ class Generator
10
+ def initialize(logger: Logger.new($stdout),
11
+ txt_workers: {count: 4, queue_size: 5000},
12
+ bin_workers: {count: 4, queue_size: 5000})
13
+ @gen = Random.new
14
+ @logger = logger
15
+ @words_generated = {}
16
+
17
+ @txt_work_q = SizedQueue.new(txt_workers[:queue_size])
18
+ @txt_workers = start_workers(:txt, @txt_work_q, txt_workers[:count]) do |file, len|
19
+ while len > 0
20
+ line = (LiterateRandomizer.sentence + $/)
21
+ line.slice!(len..-1)
22
+ len -= file.write(line)
23
+ end
24
+ end
25
+
26
+ @bin_work_q = SizedQueue.new(bin_workers[:queue_size])
27
+ @bin_workers = start_workers(:bin, @bin_work_q, bin_workers[:count]) do |file, len|
28
+ while len > 0
29
+ len -= file.write(@gen.bytes(len < 32768 ? len : 32768))
30
+ end
31
+ end
32
+ end
33
+
34
+ def generate(min, max, count, depth, percent_text: 0, destination_path: nil, &block)
35
+ min = BetterBytes.dehumanize(min)
36
+ max = BetterBytes.dehumanize(max)
37
+ count = count.to_i
38
+ depth = depth.to_i
39
+ percent_text = percent_text.to_f / 100
40
+ dst_pn = Pathname.new(destination_path || '.')
41
+ feed_workers(min, max, count, depth, percent_text, dst_pn, &block)
42
+ end
43
+
44
+ def wait(sleep_interval: 0.3)
45
+ until @txt_work_q.empty? && @bin_work_q.empty?
46
+ block_given? and
47
+ yield(@txt_work_q.size, @bin_work_q.size)
48
+ sleep(sleep_interval)
49
+ end
50
+ end
51
+
52
+ def kill
53
+ (@txt_workers + @bin_workers).each{|th| th.kill}
54
+ ThreadsWait.all_waits(@txt_workers + @bin_workers) do |th|
55
+ th.join
56
+ end
57
+ end
58
+
59
+ ######################################################################
60
+ private
61
+
62
+ def start_workers(name, queue, count)
63
+ count.times.map do
64
+ Thread.new do
65
+ begin
66
+ loop do
67
+ len, fn = queue.pop
68
+ fn.open('wb') do |file|
69
+ yield file, len
70
+ end
71
+ end
72
+ rescue Exception => ex
73
+ @logger.fatal "#{Thread.current} failed (#{name})"
74
+ @logger.fatal ex
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ def feed_workers(min, max, count, depth, percent_text, dst_pn)
81
+ paths = {}
82
+
83
+ count.times do |i|
84
+ d = Random.rand(depth)
85
+ if d < 1
86
+ pn = Pathname.new('')
87
+ else
88
+ pn = nil
89
+ pns = paths[d] and pn = pns[@gen.rand(pns.size)]
90
+ unless pn
91
+ pn = Pathname.new(gen_name)
92
+ d.times{ pn += gen_name }
93
+ (paths[d] ||= []) << pn
94
+ end
95
+ end
96
+
97
+ fn = dst_pn + pn + (gen_name + (@gen.rand > percent_text ? '.bin' : '.txt'))
98
+ fn.parent.mkpath
99
+ len = @gen.rand(min..max).round
100
+
101
+ block_given? and
102
+ yield(fn, len)
103
+
104
+ if fn.extname == '.txt'
105
+ @txt_work_q.push([len, fn])
106
+ else
107
+ @bin_work_q.push([len, fn])
108
+ end
109
+ end
110
+ end
111
+
112
+ def gen_name
113
+ # guarantee the same capitalization is used for the same word
114
+ word = LiterateRandomizer.word
115
+ lword = word.downcase
116
+ prev_word = @words_generated[lword] and
117
+ return prev_word
118
+ return @words_generated[lword] = word
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,139 @@
1
+ require 'logger'
2
+ require 'filemagic'
3
+ require 'literate_randomizer'
4
+
5
+ module Filecamo
6
+ class TextMucker
7
+ MAX_FILE_SIZE = 128 * 1024
8
+
9
+ LANG_MARKS = {
10
+ csharp: '//',
11
+ python: '#',
12
+ ruby: '#',
13
+ shell: '#',
14
+ js: '#',
15
+ plain: '#',
16
+ }
17
+
18
+ # todo: fix parsable types, somethign like the following
19
+ # (figure out how to insert, perhaps deep iterater and counters?)
20
+ # (figure out types to insert, (i.e. would be bad to insert string into number array))
21
+ LANG_PARSERS = {
22
+ json: ->(fn){}
23
+ }
24
+
25
+ def initialize(comment_prefix, logger: Logger.new($stdout))
26
+ @marks = LANG_MARKS.clone
27
+ @marks.each_value{|m| m << comment_prefix}
28
+ @logger = logger
29
+ @magic = FileMagic.new
30
+ @mime = FileMagic.mime
31
+ end
32
+
33
+ def muck(percent_select, percent_lines, paths)
34
+ select_chance = percent_select.to_f / 100
35
+ lines_chance = percent_lines.to_f / 100
36
+
37
+ paths.each do |path|
38
+ path[0] == '.' and next
39
+ if File.directory?(path)
40
+ paths.concat(Dir.entries(path).map{|e| e[0] == '.' ? nil : File.join(path,e)}.compact)
41
+ next
42
+ end
43
+
44
+ fn = path
45
+ fn_size = File.size(fn)
46
+
47
+ # todo: support working with large files by reading next line
48
+ if fn_size > MAX_FILE_SIZE
49
+ @logger.debug "Skipping #{fn} by size: #{file.size}"
50
+ break
51
+ end
52
+
53
+ lang = case File.extname(fn)
54
+ when '.cs' then :csharp
55
+ when '.py' then :python
56
+ when '.js' then :js
57
+ when '.json' then :json
58
+ when '.yaml','meta' then :yaml
59
+ when '.html' then :html
60
+ when '.txt' then :plain
61
+ else
62
+ case m = @mime.file(fn)
63
+ when /python/ then :python
64
+ when /ruby/ then :ruby
65
+ when /shell/ then :shell
66
+ when /plain/
67
+ case g = @magic.file(fn)
68
+ when /python/ then :python
69
+ when /ruby/ then :ruby
70
+ when /node/ then :js
71
+ else
72
+ :plain
73
+ end
74
+ else
75
+ @logger.debug "Skipping #{fn} by mime type: #{m}"
76
+ next
77
+ end
78
+ end
79
+
80
+ if Random.rand > select_chance
81
+ @logger.debug "Skipping #{fn} by chance"
82
+ next
83
+ end
84
+
85
+ new_lines = {}
86
+ new_bytes_needed = (fn_size * lines_chance).floor
87
+ while new_bytes_needed > 0
88
+ offset = Random.rand(fn_size)
89
+ new_line = get_line_for(lang)
90
+ new_lines[offset] = new_line
91
+ new_bytes_needed -= new_line.bytesize
92
+ end
93
+ new_lines = new_lines.sort
94
+
95
+ body = ''
96
+ line_nums = []
97
+
98
+ File.open(fn) do |file|
99
+ line_num = 0
100
+ while !file.eof? && line = file.readline
101
+ body << line
102
+ line_num += 1
103
+ new_lines.empty? and next # read remainder of file
104
+ offset = new_lines[0][0]
105
+ if file.pos >= offset # add a line as soon as passed the offset
106
+ offset, new_line = new_lines.shift
107
+ line_nums << (line_num+=1)
108
+ body << new_line
109
+ end
110
+ end
111
+
112
+ # concat any remaining lines
113
+ if !new_lines.empty?
114
+ body[-1] == $/ or body << $/
115
+ new_lines.each do |offset, new_line|
116
+ line_num += 1
117
+ line_nums << line_num
118
+ body << new_line
119
+ end
120
+ end
121
+ end
122
+
123
+ # todo: use same charset as mime type indicates when writing!
124
+ File.open(fn, 'wb') {|f| f.write(body)}
125
+
126
+ block_given? and yield(fn, lang, line_nums)
127
+ end
128
+ end
129
+
130
+ ######################################################################
131
+ private
132
+
133
+ def get_line_for(lang)
134
+ mark = @marks[lang] or return ''
135
+ # todo: match line endings of file!
136
+ return mark + LiterateRandomizer.sentence + $/
137
+ end
138
+ end
139
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: filecamo
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brad Robel-Forrest
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-08-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: better_bytes
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: literate_randomizer
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.4.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.4.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-filemagic
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.7.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.7.1
55
+ description: Generate and manipulate entire directory trees of either binary or text
56
+ file content.
57
+ email: brad@bitpony.com
58
+ executables:
59
+ - filecamo
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - README.md
64
+ - bin/filecamo
65
+ - filecamo.gempec
66
+ - lib/filecamo.rb
67
+ - lib/filecamo/generator.rb
68
+ - lib/filecamo/text_mucker.rb
69
+ homepage: https://github.com/bradrf/filecamo#readme
70
+ licenses:
71
+ - MIT
72
+ metadata: {}
73
+ post_install_message:
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ requirements: []
88
+ rubyforge_project:
89
+ rubygems_version: 2.5.1
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: File content generator and manipulator.
93
+ test_files: []