rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -0,0 +1,20 @@
1
+ require_relative '../../refactor'
2
+ require 'scout/misc'
3
+ require 'scout/open'
4
+ require_relative '../log/refactor'
5
+
6
+ Rbbt.relay_module_method Misc, :sensiblewrite, Open, :sensible_write
7
+ Rbbt.relay_module_method Misc, :file2md5, Misc, :digest_file
8
+ Rbbt.relay_module_method Misc, :lock, Open
9
+ Rbbt.relay_module_method Misc, :consume_stream, Open
10
+ Rbbt.relay_module_method Misc, :sort_stream, Open
11
+ Rbbt.relay_module_method Misc, :sanitize_filename, Path
12
+ Rbbt.relay_module_method Misc, :collapse_stream, Open
13
+ Rbbt.relay_module_method Misc, :open_pipe, Open
14
+ Rbbt.relay_module_method Misc, :pipe, Open
15
+ Rbbt.relay_module_method Misc, :with_fifo, Open
16
+ Rbbt.relay_module_method Misc, :zip2hash, IndiferentHash
17
+ Rbbt.relay_module_method Misc, :obj2md5, Misc, :digest
18
+ Rbbt.relay_module_method Misc, :obj2digest, Misc, :digest
19
+ Rbbt.relay_module_method Misc, :add_defaults, IndiferentHash, :add_defaults
20
+ Rbbt.relay_module_method Misc, :process_options, IndiferentHash, :process_options
@@ -238,27 +238,33 @@ end:
238
238
 
239
239
  end
240
240
 
241
- def self.align(query_sequence, target_sequence)
242
- Log.low { "Aligning #{ Misc.fingerprint query_sequence } to #{ Misc.fingerprint target_sequence }" }
241
+ def self.align_txt(query_sequence, target_sequence)
242
+ Log.low { "Aligning #{ Log.fingerprint query_sequence } to #{ Log.fingerprint target_sequence }" }
243
243
 
244
- begin
245
- raise "No query sequence" if query_sequence.nil?
246
- raise "No target sequence" if target_sequence.nil?
244
+ raise "No query sequence" if query_sequence.nil?
245
+ raise "No target sequence" if target_sequence.nil?
247
246
 
248
- s_out = Misc.open_pipe do |s_in|
249
- SmithWaterman.ssw_aa(query_sequence, target_sequence, query_sequence.length, target_sequence.length, s_in.fileno)
250
- end
247
+ s_out = Open.open_pipe do |s_in|
248
+ SmithWaterman.ssw_aa(query_sequence, target_sequence, query_sequence.length, target_sequence.length, s_in.fileno)
249
+ end
250
+
251
+ txt = s_out.read
252
+ s_out.close
253
+ s_out.join
254
+
255
+ txt
256
+ end
257
+
258
+ def self.align(query_sequence, target_sequence)
259
+ txt = align_txt(query_sequence, target_sequence)
251
260
 
252
- txt = s_out.read
253
- s_out.close
254
- s_out.join
255
- txt
261
+ begin
262
+ ppp txt if Rbbt::Config.get(:print, :ssw_aligmnent, default: false).to_s == 'true'
256
263
 
257
- target_start, target, target_end = txt.match(/Target:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
264
+ target_start, target, target_end = txt.match(/Target:\s+(\d+)\s+([A-Za-z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
258
265
 
259
- query_start, query, query_end = txt.match(/Query:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
266
+ query_start, query, query_end = txt.match(/Query:\s+(\d+)\s+([A-Za-z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
260
267
 
261
- txt.replace ""
262
268
  [("_" * (query_start.to_i - 1)) + query, ("_" * (target_start.to_i - 1)) + target]
263
269
  rescue
264
270
  Log.warn("Error in aligmnent: #{$!.message}")
@@ -266,7 +272,7 @@ end:
266
272
  end
267
273
  end
268
274
 
269
- def self.alignment_map(source, target)
275
+ def self.alignment_map(source, target, filter_low_quality=true)
270
276
  alignment_source, alignment_target = SmithWaterman.align(source, target)
271
277
  map = {}
272
278
 
@@ -289,10 +295,14 @@ end:
289
295
  end
290
296
  end
291
297
 
292
- if miss_match + gaps_source > alignment_source.length.to_f / 2
298
+ if filter_low_quality && miss_match + gaps_source > alignment_source.length.to_f / 2
293
299
  {}
294
300
  else
295
301
  map
296
302
  end
297
303
  end
304
+
305
+ def self.alignment_percent(source, target, filter_low_quality=true)
306
+ alignment_map(source, target,filter_low_quality).keys.length.to_f / source.length
307
+ end
298
308
  end
@@ -1,105 +1,92 @@
1
- module Misc
2
-
3
- def self.hostname
4
- @hostanem ||= `hostname`.strip
5
- end
6
-
7
- def self.pid_exists?(pid)
8
- return false if pid.nil?
9
- begin
10
- Process.getpgid(pid.to_i)
11
- true
12
- rescue Errno::ESRCH
13
- false
14
- end
15
- end
16
-
17
- def self.env_add(var, value, sep = ":", prepend = true)
18
- ENV[var] ||= ""
19
- return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
20
- if prepend
21
- ENV[var] = value + sep + ENV[var]
22
- else
23
- ENV[var] += sep + ENV[var]
24
- end
25
- end
26
-
27
- def self.with_env(var, value, &block)
28
- var = var.to_s
29
- value = value.to_s
30
- current = ENV[var]
31
- begin
32
- ENV[var] = value
33
- yield
34
- ensure
35
- ENV[var] = current
36
- end
37
- end
38
-
39
- def self.path_relative_to(basedir, path)
40
- path = File.expand_path(path) unless path[0] == "/"
41
- basedir = File.expand_path(basedir) unless basedir[0] == "/"
42
-
43
- if path.index(basedir) == 0
44
- if basedir[-1] == "/"
45
- return path[basedir.length..-1]
46
- else
47
- return path[basedir.length+1..-1]
48
- end
49
- else
50
- return nil
51
- end
52
- end
53
-
54
- def self.common_path(dir, file)
55
- file = File.expand_path file
56
- dir = File.expand_path dir
57
-
58
- return true if file == dir
59
- while File.dirname(file) != file
60
- file = File.dirname(file)
61
- return true if file == dir
62
- end
63
-
64
- return false
65
- end
66
-
67
-
68
- def self.relative_link(source, target_dir)
69
- path = "."
70
- current = target_dir
71
- while ! Misc.common_path current, source
72
- current = File.dirname(current)
73
- path = File.join(path, '..')
74
- return nil if current == "/"
75
- end
76
-
77
- File.join(path, Misc.path_relative_to(current, source))
78
- end
79
-
80
- # WARN: probably not thread safe...
81
- def self.in_dir(dir)
82
- old_pwd = FileUtils.pwd
83
- res = nil
84
- begin
85
- FileUtils.mkdir_p dir unless File.exist?(dir)
86
- FileUtils.cd dir
87
- res = yield
88
- ensure
89
- FileUtils.cd old_pwd
90
- end
91
- res
92
- end
93
-
94
- def self.is_filename?(string, need_to_exists = true)
95
- return false if string.nil?
96
- return true if defined? Path and Path === string
97
- return true if string.respond_to? :exists
98
- return true if String === string and ! string.include?("\n") and string.split("/").select{|p| p.length > 265}.empty? and (! need_to_exists || File.exist?(string))
99
- return false
100
- end
101
-
102
- class << self
103
- alias filename? is_filename?
104
- end
105
- end
1
+ require_relative 'refactor'
2
+ Rbbt.require_instead 'scout/misc/format'
3
+ #module Misc
4
+ #
5
+ # def self.hostname
6
+ # @hostanem ||= `hostname`.strip
7
+ # end
8
+ #
9
+ # def self.pid_exists?(pid)
10
+ # return false if pid.nil?
11
+ # begin
12
+ # Process.getpgid(pid.to_i)
13
+ # true
14
+ # rescue Errno::ESRCH
15
+ # false
16
+ # end
17
+ # end
18
+ #
19
+ # def self.env_add(var, value, sep = ":", prepend = true)
20
+ # ENV[var] ||= ""
21
+ # return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
22
+ # if prepend
23
+ # ENV[var] = value + sep + ENV[var]
24
+ # else
25
+ # ENV[var] += sep + ENV[var]
26
+ # end
27
+ # end
28
+ #
29
+ # def self.with_env(var, value, &block)
30
+ # var = var.to_s
31
+ # value = value.to_s
32
+ # current = ENV[var]
33
+ # begin
34
+ # ENV[var] = value
35
+ # yield
36
+ # ensure
37
+ # ENV[var] = current
38
+ # end
39
+ # end
40
+ #
41
+ # def self.common_path(dir, file)
42
+ # file = File.expand_path file
43
+ # dir = File.expand_path dir
44
+ #
45
+ # return true if file == dir
46
+ # while File.dirname(file) != file
47
+ # file = File.dirname(file)
48
+ # return true if file == dir
49
+ # end
50
+ #
51
+ # return false
52
+ # end
53
+ #
54
+ #
55
+ # def self.relative_link(source, target_dir)
56
+ # path = "."
57
+ # current = target_dir
58
+ # while ! Misc.common_path current, source
59
+ # current = File.dirname(current)
60
+ # path = File.join(path, '..')
61
+ # return nil if current == "/"
62
+ # end
63
+ #
64
+ # File.join(path, Misc.path_relative_to(current, source))
65
+ # end
66
+ #
67
+ # # WARN: probably not thread safe...
68
+ # def self.in_dir(dir)
69
+ # old_pwd = FileUtils.pwd
70
+ # res = nil
71
+ # begin
72
+ # FileUtils.mkdir_p dir unless File.exist?(dir)
73
+ # FileUtils.cd dir
74
+ # res = yield
75
+ # ensure
76
+ # FileUtils.cd old_pwd
77
+ # end
78
+ # res
79
+ # end
80
+ #
81
+ # def self.is_filename?(string, need_to_exists = true)
82
+ # return false if string.nil?
83
+ # return true if defined? Path and Path === string
84
+ # return true if string.respond_to? :exists
85
+ # return true if String === string and ! string.include?("\n") and string.split("/").select{|p| p.length > 265}.empty? and (! need_to_exists || File.exist?(string))
86
+ # return false
87
+ # end
88
+ #
89
+ # class << self
90
+ # alias filename? is_filename?
91
+ # end
92
+ #end
@@ -1,28 +1,28 @@
1
- require 'lockfile'
1
+ require_relative '../../rbbt'
2
2
  require 'digest/md5'
3
3
  require 'cgi'
4
4
  require 'zlib'
5
5
  require 'etc'
6
6
  require 'rubygems/package'
7
7
 
8
- require 'rbbt/util/tar'
9
- require 'rbbt/util/misc/exceptions'
10
- require 'rbbt/util/misc/concurrent_stream'
11
- require 'rbbt/util/misc/indiferent_hash'
12
- require 'rbbt/util/misc/pipes'
13
- require 'rbbt/util/misc/format'
14
- require 'rbbt/util/misc/omics'
15
- require 'rbbt/util/misc/inspect'
16
- require 'rbbt/util/misc/math'
17
- require 'rbbt/util/misc/development'
18
- require 'rbbt/util/misc/lock'
19
- require 'rbbt/util/misc/options'
20
- require 'rbbt/util/misc/system'
21
- require 'rbbt/util/misc/objects'
22
- require 'rbbt/util/misc/manipulation'
23
- require 'rbbt/util/misc/communication'
24
-
25
- require 'rbbt/util/misc/serialize'
8
+ require_relative 'tar'
9
+ require_relative 'misc/exceptions'
10
+ require_relative 'misc/concurrent_stream'
11
+ require_relative 'misc/indiferent_hash'
12
+ require_relative 'misc/pipes'
13
+ require_relative 'misc/format'
14
+ require_relative 'misc/omics'
15
+ require_relative 'misc/inspect'
16
+ #require_relative 'misc/math'
17
+ require_relative 'misc/development'
18
+ require_relative 'misc/lock'
19
+ require_relative 'misc/options'
20
+ require_relative 'misc/system'
21
+ require_relative 'misc/objects'
22
+ require_relative 'misc/manipulation'
23
+ require_relative 'misc/communication'
24
+
25
+ require_relative 'misc/serialize'
26
26
 
27
27
  require 'to_regexp'
28
28
 
@@ -104,7 +104,26 @@ module Misc
104
104
  time
105
105
  end
106
106
 
107
-
107
+ def self.name2basename(file)
108
+ sanitize_filename(file.gsub("/",'·').gsub("~", '-'))
109
+ end
110
+
111
+ def self.sanitize_filename(filename, length = 254)
112
+ if filename.length > length
113
+ if filename =~ /(\..{2,9})$/
114
+ extension = $1
115
+ else
116
+ extension = ''
117
+ end
118
+
119
+ post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension
120
+
121
+ filename = filename[0..(length - post_fix.length - 1)] << post_fix
122
+ else
123
+ filename
124
+ end
125
+ filename
126
+ end
108
127
  end
109
128
 
110
129
  module PDF2Text
@@ -0,0 +1,4 @@
1
+ require 'scout/named_array'
2
+ module NamedArray
3
+ annotation :entity_options, :entity_templates
4
+ end
@@ -1,220 +1,3 @@
1
- require 'rbbt/util/misc'
2
-
3
- module NamedArray
4
- #extend ChainMethods
5
- #self.chain_prefix = :named_array
6
-
7
- attr_accessor :fields
8
- attr_accessor :key
9
- attr_accessor :entity_options
10
- attr_accessor :entity_templates
11
-
12
- def shift
13
- fields.shift
14
- super
15
- end
16
-
17
- def entity_templates
18
- @entity_templates ||= {}
19
- end
20
-
21
- def self.setup(array, fields, key = nil, entity_options = nil, entity_templates = nil)
22
- return array if array.nil?
23
- array.extend NamedArray unless NamedArray === array
24
- array.fields = Annotated === fields ? Annotated.purge(fields) : fields
25
- array.key = key
26
- array.entity_options = entity_options unless entity_options.nil?
27
- array.entity_templates = entity_templates unless entity_templates.nil?
28
- array
29
- end
30
-
31
- def prepare_entity(entity, field, options = {})
32
- return entity if entity.nil?
33
- return entity unless defined? Entity
34
- template = entity_templates[field]
35
- entity_templates ||= {}
36
- if template and template.respond_to?(:annotate)
37
- begin entity = entity.dup if entity.frozen?; rescue; end
38
- entity = template.annotate(entity)
39
- entity.extend AnnotatedArray if Array === entity
40
- entity
41
- else
42
- if entity_templates.include? field
43
- entity
44
- else
45
- template = Misc.prepare_entity("ENTITY_TEMPLATE", field, options)
46
- if template.respond_to?(:annotate)
47
- entity_templates[field] = template
48
- begin entity = entity.dup if entity.frozen?; rescue; end
49
- entity = template.annotate(entity)
50
- entity.extend AnnotatedArray if Array === entity
51
- entity
52
- else
53
- entity_templates[field] = nil
54
- entity
55
- end
56
- end
57
- end
58
- end
59
-
60
- def merge(array)
61
- double = Array === array.first
62
- new = self.dup
63
- (0..length - 1).each do |i|
64
- if double
65
- new[i] = new[i] + array[i]
66
- else
67
- new[i] << array[i]
68
- end
69
- end
70
- new
71
- end
72
-
73
- def positions(fields)
74
- if Array == fields
75
- fields.collect{|field|
76
- Misc.field_position(@fields, field)
77
- }
78
- else
79
- Misc.field_position(@fields, fields)
80
- end
81
- end
82
-
83
- #def named_array_get_brackets(key)
84
- # pos = Misc.field_position(fields, key)
85
- # elem = named_array_clean_get_brackets(pos)
86
-
87
- # return elem if @fields.nil? or @fields.empty?
88
-
89
- # field = NamedArray === @fields ? @fields.named_array_clean_get_brackets(pos) : @fields[pos]
90
- # elem = prepare_entity(elem, field, entity_options)
91
- # elem
92
- #end
93
-
94
- def [](key, clean = false)
95
- pos = Misc.field_position(fields, key)
96
- elem = super(pos)
97
- return elem if clean
98
-
99
- return elem if @fields.nil? or @fields.empty?
100
-
101
- field = NamedArray === @fields ? @fields[pos, true] : @fields[pos]
102
- elem = prepare_entity(elem, field, entity_options)
103
- elem
104
- end
105
-
106
- #def named_array_each(&block)
107
- # if defined?(Entity) and not @fields.nil? and not @fields.empty?
108
- # @fields.zip(self).each do |field,elem|
109
- # elem = prepare_entity(elem, field, entity_options)
110
- # yield(elem)
111
- # elem
112
- # end
113
- # else
114
- # named_array_clean_each &block
115
- # end
116
- #end
117
-
118
- def each(&block)
119
- if defined?(Entity) && ! (@fields.nil? || @fields.empty?)
120
- i = 0
121
- super do |elem|
122
- field = @fields[i]
123
- elem = prepare_entity(elem, field, entity_options)
124
- yield(elem)
125
- i += 1
126
- elem
127
- end
128
- else
129
- super &block
130
- end
131
-
132
- end
133
-
134
- #def named_array_collect
135
- # res = []
136
-
137
- # each do |elem|
138
- # if block_given?
139
- # res << yield(elem)
140
- # else
141
- # res << elem
142
- # end
143
- # end
144
-
145
- # res
146
- #end
147
-
148
-
149
- def collect
150
- res = []
151
-
152
- each do |elem|
153
- if block_given?
154
- res << yield(elem)
155
- else
156
- res << elem
157
- end
158
- end
159
-
160
- res
161
- end
162
-
163
- #def named_array_set_brackets(key,value)
164
- # named_array_clean_set_brackets(Misc.field_position(fields, key), value)
165
- #end
166
-
167
- def []=(key, value)
168
- super(Misc.field_position(fields, key), value)
169
- end
170
-
171
- #def named_array_values_at(*keys)
172
- # keys = keys.collect{|k| Misc.field_position(fields, k, true) }
173
- # keys.collect{|k|
174
- # named_array_get_brackets(k) unless k.nil?
175
- # }
176
- #end
177
-
178
-
179
-
180
- def values_at(*keys)
181
- keys = keys.collect{|k| Misc.field_position(fields, k, true) }
182
- keys.collect{|k|
183
- self[k] unless k.nil?
184
- }
185
- end
186
-
187
- def zip_fields
188
- return [] if self.empty?
189
- zipped = Misc.zip_fields(self)
190
- zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
191
- zipped
192
- end
193
-
194
- def detach(file)
195
- file_fields = file.fields.collect{|field| field.fullname}
196
- detached_fields = []
197
- self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
198
- fields = self.fields.values_at *detached_fields
199
- values = self.values_at *detached_fields
200
- values = NamedArray.name(values, fields)
201
- values.zip_fields
202
- end
203
-
204
- def report
205
- fields.zip(self).collect do |field,value|
206
- "#{Log.color(:magenta, field) }: #{ Array === value ? value * "|" : value }"
207
- end * "\n"
208
- end
209
-
210
- def to_hash
211
- hash = {}
212
- self.fields.zip(self) do |field,value|
213
- hash[field] = value
214
- end
215
- IndiferentHash.setup hash
216
- hash
217
- end
218
-
219
- end
220
-
1
+ require_relative '../refactor'
2
+ Rbbt.require_instead 'scout/named_array'
3
+ require_relative 'named_array/refactor'
@@ -0,0 +1,7 @@
1
+ require 'rbbt/util/misc/bgzf'
2
+ module Open
3
+ class << self
4
+ attr_accessor :repository_dirs
5
+ end
6
+ self.repository_dirs = []
7
+ end