filecamo 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cc867e4a005a911f06b95fc7a483ab8321a517c3
4
+ data.tar.gz: 8229c95f127d9e938ff75ea31c24a817a30099d3
5
+ SHA512:
6
+ metadata.gz: 2a1af293cb02a0e77998337a2c75e348f0578c3abcedf93c421ea7de885ec207141112f2465dc35ed52e7be08ed8d32f042002ae10c5c4a2d37b73341996d8fc
7
+ data.tar.gz: 3f3a3363e9d00aa1006e3174ff700d643449bdf13541a892ba65f274004736d1fa3ecb9fc07ff700afd8b2afe87cf741ff9580a5db37898ae0d53072d9e86952
data/README.md ADDED
@@ -0,0 +1 @@
1
+ # filecamo
data/bin/filecamo ADDED
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/filecamo'
4
+
5
+ BN = File.basename($0, '.rb')
6
+
7
+ cmd = ARGV.shift
8
+ if cmd == 'gen'
9
+ if i = ARGV.index('-d')
10
+ ARGV.delete_at(i)
11
+ dst_path = ARGV.delete_at(i)
12
+ end
13
+
14
+ if ARGV.size < 2
15
+ bin, bm = BetterBytes::BINARY
16
+ dec, dm = BetterBytes::DECIMAL
17
+
18
+ $stderr.puts <<EOF
19
+
20
+ usage: #{BN} gen [-d <destination_path>] <minimum_size> <maximum_size> <count> <depth> [<percent_text>]
21
+
22
+ The size MAY end with a unit label:
23
+ * BINARY labels (#{bm} multiplier): #{bin.join(', ')}
24
+ * DECIMAL labels (#{dm} multiplier): #{dec.join(', ')}
25
+
26
+ The count will determine the number of files to generate.
27
+
28
+ The depth will determine the hierarchy of the files. The number of files will be randomly
29
+ distributed among the tree.
30
+
31
+ The percent text optional value indicates how many files should be text instead of binary.
32
+ Default is none (i.e. all files have random binary content).
33
+
34
+ EOF
35
+ exit 1
36
+ end
37
+
38
+ min, max, count, depth, percent_text = ARGV
39
+ generator = Filecamo::Generator.new
40
+ generator.generate(min, max, count, depth,
41
+ percent_text: percent_text, destination_path: dst_path) do |path, len|
42
+ puts "#{BetterBytes.humanize(len, precision: 6.1)} => #{path}"
43
+ end
44
+ generator.wait do |txt_count, bin_count|
45
+ $stdout.printf "\rWaiting for #{txt_count} txt and #{bin_count} bin jobs "\
46
+ "to complete..." + (' '*10)
47
+ $stdout.flush
48
+ end
49
+ $stdout.puts
50
+ generator.kill
51
+
52
+ elsif cmd == 'muck'
53
+ if ARGV.size < 3
54
+ $stderr.puts <<EOF
55
+
56
+ usage: #{BN} muck <percent_select> <percent_change> <path> [<path>...]
57
+
58
+ The percent select should be a value from 1 to 100 indicating how likely a given file will be
59
+ modified.
60
+
61
+ The percent change should be a value from 1 to 100 indicating (roughly) how many lines should
62
+ be added.
63
+
64
+ Paths can be filenames or directories to recurse and will be descended breadth-first. Any file
65
+ entry starting with '.' is skipped (ignored).
66
+
67
+ EOF
68
+ exit 1
69
+ end
70
+
71
+ percent_select, percent_change, *paths = ARGV
72
+ mucker = Filecamo::TextMucker.new(" #{BN}: ")
73
+ mucker.muck(percent_select, percent_change, paths) do |fn, lang, line_nums|
74
+ puts "Modified #{fn} (#{lang}): #{line_nums.join(',')}"
75
+ end
76
+
77
+ else
78
+ $stderr.puts <<EOF
79
+
80
+ usage: #{BN} { gen | muck } ...
81
+
82
+ EOF
83
+ exit 1
84
+ end
data/filecamo.gempec ADDED
@@ -0,0 +1,15 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'filecamo'
3
+ s.version = File.readlines('lib/filecamo.rb').grep(/VERSION/){|v|v.match(/'([^']+)'/)[1]}[0]
4
+ s.summary = 'File content generator and manipulator.'
5
+ s.description = 'Generate and manipulate entire directory trees of either binary or text file content.'
6
+ s.authors = ['Brad Robel-Forrest']
7
+ s.email = 'brad@bitpony.com'
8
+ s.files = `git ls-files -z`.split("\x0")
9
+ s.executables = ['filecamo']
10
+ s.homepage = 'https://github.com/bradrf/filecamo#readme'
11
+ s.license = 'MIT'
12
+ s.add_runtime_dependency 'better_bytes', '~> 0.0.1'
13
+ s.add_runtime_dependency 'literate_randomizer', '~> 0.4.0'
14
+ s.add_runtime_dependency 'ruby-filemagic', '~> 0.7.1'
15
+ end
data/lib/filecamo.rb ADDED
@@ -0,0 +1,6 @@
1
+ module Filecamo
2
+ VERSION = '0.0.1'
3
+ end
4
+
5
+ require_relative './filecamo/generator'
6
+ require_relative './filecamo/text_mucker'
@@ -0,0 +1,121 @@
1
+ require 'logger'
2
+ require 'thread'
3
+ require 'thwait'
4
+ require 'pathname'
5
+ require 'better_bytes'
6
+ require 'literate_randomizer'
7
+
8
+ module Filecamo
9
+ class Generator
10
+ def initialize(logger: Logger.new($stdout),
11
+ txt_workers: {count: 4, queue_size: 5000},
12
+ bin_workers: {count: 4, queue_size: 5000})
13
+ @gen = Random.new
14
+ @logger = logger
15
+ @words_generated = {}
16
+
17
+ @txt_work_q = SizedQueue.new(txt_workers[:queue_size])
18
+ @txt_workers = start_workers(:txt, @txt_work_q, txt_workers[:count]) do |file, len|
19
+ while len > 0
20
+ line = (LiterateRandomizer.sentence + $/)
21
+ line.slice!(len..-1)
22
+ len -= file.write(line)
23
+ end
24
+ end
25
+
26
+ @bin_work_q = SizedQueue.new(bin_workers[:queue_size])
27
+ @bin_workers = start_workers(:bin, @bin_work_q, bin_workers[:count]) do |file, len|
28
+ while len > 0
29
+ len -= file.write(@gen.bytes(len < 32768 ? len : 32768))
30
+ end
31
+ end
32
+ end
33
+
34
+ def generate(min, max, count, depth, percent_text: 0, destination_path: nil, &block)
35
+ min = BetterBytes.dehumanize(min)
36
+ max = BetterBytes.dehumanize(max)
37
+ count = count.to_i
38
+ depth = depth.to_i
39
+ percent_text = percent_text.to_f / 100
40
+ dst_pn = Pathname.new(destination_path || '.')
41
+ feed_workers(min, max, count, depth, percent_text, dst_pn, &block)
42
+ end
43
+
44
+ def wait(sleep_interval: 0.3)
45
+ until @txt_work_q.empty? && @bin_work_q.empty?
46
+ block_given? and
47
+ yield(@txt_work_q.size, @bin_work_q.size)
48
+ sleep(sleep_interval)
49
+ end
50
+ end
51
+
52
+ def kill
53
+ (@txt_workers + @bin_workers).each{|th| th.kill}
54
+ ThreadsWait.all_waits(@txt_workers + @bin_workers) do |th|
55
+ th.join
56
+ end
57
+ end
58
+
59
+ ######################################################################
60
+ private
61
+
62
+ def start_workers(name, queue, count)
63
+ count.times.map do
64
+ Thread.new do
65
+ begin
66
+ loop do
67
+ len, fn = queue.pop
68
+ fn.open('wb') do |file|
69
+ yield file, len
70
+ end
71
+ end
72
+ rescue Exception => ex
73
+ @logger.fatal "#{Thread.current} failed (#{name})"
74
+ @logger.fatal ex
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ def feed_workers(min, max, count, depth, percent_text, dst_pn)
81
+ paths = {}
82
+
83
+ count.times do |i|
84
+ d = Random.rand(depth)
85
+ if d < 1
86
+ pn = Pathname.new('')
87
+ else
88
+ pn = nil
89
+ pns = paths[d] and pn = pns[@gen.rand(pns.size)]
90
+ unless pn
91
+ pn = Pathname.new(gen_name)
92
+ d.times{ pn += gen_name }
93
+ (paths[d] ||= []) << pn
94
+ end
95
+ end
96
+
97
+ fn = dst_pn + pn + (gen_name + (@gen.rand > percent_text ? '.bin' : '.txt'))
98
+ fn.parent.mkpath
99
+ len = @gen.rand(min..max).round
100
+
101
+ block_given? and
102
+ yield(fn, len)
103
+
104
+ if fn.extname == '.txt'
105
+ @txt_work_q.push([len, fn])
106
+ else
107
+ @bin_work_q.push([len, fn])
108
+ end
109
+ end
110
+ end
111
+
112
+ def gen_name
113
+ # guarantee the same capitalization is used for the same word
114
+ word = LiterateRandomizer.word
115
+ lword = word.downcase
116
+ prev_word = @words_generated[lword] and
117
+ return prev_word
118
+ return @words_generated[lword] = word
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,139 @@
1
+ require 'logger'
2
+ require 'filemagic'
3
+ require 'literate_randomizer'
4
+
5
+ module Filecamo
6
+ class TextMucker
7
+ MAX_FILE_SIZE = 128 * 1024
8
+
9
+ LANG_MARKS = {
10
+ csharp: '//',
11
+ python: '#',
12
+ ruby: '#',
13
+ shell: '#',
14
+ js: '#',
15
+ plain: '#',
16
+ }
17
+
18
+ # todo: fix parsable types, somethign like the following
19
+ # (figure out how to insert, perhaps deep iterater and counters?)
20
+ # (figure out types to insert, (i.e. would be bad to insert string into number array))
21
+ LANG_PARSERS = {
22
+ json: ->(fn){}
23
+ }
24
+
25
+ def initialize(comment_prefix, logger: Logger.new($stdout))
26
+ @marks = LANG_MARKS.clone
27
+ @marks.each_value{|m| m << comment_prefix}
28
+ @logger = logger
29
+ @magic = FileMagic.new
30
+ @mime = FileMagic.mime
31
+ end
32
+
33
+ def muck(percent_select, percent_lines, paths)
34
+ select_chance = percent_select.to_f / 100
35
+ lines_chance = percent_lines.to_f / 100
36
+
37
+ paths.each do |path|
38
+ path[0] == '.' and next
39
+ if File.directory?(path)
40
+ paths.concat(Dir.entries(path).map{|e| e[0] == '.' ? nil : File.join(path,e)}.compact)
41
+ next
42
+ end
43
+
44
+ fn = path
45
+ fn_size = File.size(fn)
46
+
47
+ # todo: support working with large files by reading next line
48
+ if fn_size > MAX_FILE_SIZE
49
+ @logger.debug "Skipping #{fn} by size: #{file.size}"
50
+ break
51
+ end
52
+
53
+ lang = case File.extname(fn)
54
+ when '.cs' then :csharp
55
+ when '.py' then :python
56
+ when '.js' then :js
57
+ when '.json' then :json
58
+ when '.yaml','meta' then :yaml
59
+ when '.html' then :html
60
+ when '.txt' then :plain
61
+ else
62
+ case m = @mime.file(fn)
63
+ when /python/ then :python
64
+ when /ruby/ then :ruby
65
+ when /shell/ then :shell
66
+ when /plain/
67
+ case g = @magic.file(fn)
68
+ when /python/ then :python
69
+ when /ruby/ then :ruby
70
+ when /node/ then :js
71
+ else
72
+ :plain
73
+ end
74
+ else
75
+ @logger.debug "Skipping #{fn} by mime type: #{m}"
76
+ next
77
+ end
78
+ end
79
+
80
+ if Random.rand > select_chance
81
+ @logger.debug "Skipping #{fn} by chance"
82
+ next
83
+ end
84
+
85
+ new_lines = {}
86
+ new_bytes_needed = (fn_size * lines_chance).floor
87
+ while new_bytes_needed > 0
88
+ offset = Random.rand(fn_size)
89
+ new_line = get_line_for(lang)
90
+ new_lines[offset] = new_line
91
+ new_bytes_needed -= new_line.bytesize
92
+ end
93
+ new_lines = new_lines.sort
94
+
95
+ body = ''
96
+ line_nums = []
97
+
98
+ File.open(fn) do |file|
99
+ line_num = 0
100
+ while !file.eof? && line = file.readline
101
+ body << line
102
+ line_num += 1
103
+ new_lines.empty? and next # read remainder of file
104
+ offset = new_lines[0][0]
105
+ if file.pos >= offset # add a line as soon as passed the offset
106
+ offset, new_line = new_lines.shift
107
+ line_nums << (line_num+=1)
108
+ body << new_line
109
+ end
110
+ end
111
+
112
+ # concat any remaining lines
113
+ if !new_lines.empty?
114
+ body[-1] == $/ or body << $/
115
+ new_lines.each do |offset, new_line|
116
+ line_num += 1
117
+ line_nums << line_num
118
+ body << new_line
119
+ end
120
+ end
121
+ end
122
+
123
+ # todo: use same charset as mime type indicates when writing!
124
+ File.open(fn, 'wb') {|f| f.write(body)}
125
+
126
+ block_given? and yield(fn, lang, line_nums)
127
+ end
128
+ end
129
+
130
+ ######################################################################
131
+ private
132
+
133
+ def get_line_for(lang)
134
+ mark = @marks[lang] or return ''
135
+ # todo: match line endings of file!
136
+ return mark + LiterateRandomizer.sentence + $/
137
+ end
138
+ end
139
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: filecamo
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brad Robel-Forrest
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-08-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: better_bytes
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: literate_randomizer
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.4.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.4.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-filemagic
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.7.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.7.1
55
+ description: Generate and manipulate entire directory trees of either binary or text
56
+ file content.
57
+ email: brad@bitpony.com
58
+ executables:
59
+ - filecamo
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - README.md
64
+ - bin/filecamo
65
+ - filecamo.gempec
66
+ - lib/filecamo.rb
67
+ - lib/filecamo/generator.rb
68
+ - lib/filecamo/text_mucker.rb
69
+ homepage: https://github.com/bradrf/filecamo#readme
70
+ licenses:
71
+ - MIT
72
+ metadata: {}
73
+ post_install_message:
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ requirements: []
88
+ rubyforge_project:
89
+ rubygems_version: 2.5.1
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: File content generator and manipulator.
93
+ test_files: []