imw 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/.gitignore +15 -0
  2. data/CHANGELOG +0 -0
  3. data/LICENSE +674 -0
  4. data/README.rdoc +101 -0
  5. data/Rakefile +20 -0
  6. data/VERSION +1 -0
  7. data/etc/imwrc.rb +76 -0
  8. data/lib/imw.rb +42 -0
  9. data/lib/imw/boot.rb +58 -0
  10. data/lib/imw/dataset.rb +233 -0
  11. data/lib/imw/dataset/datamapper.rb +66 -0
  12. data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
  13. data/lib/imw/dataset/loaddump.rb +50 -0
  14. data/lib/imw/dataset/old/file_collection.rb +88 -0
  15. data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
  16. data/lib/imw/dataset/scaffold.rb +132 -0
  17. data/lib/imw/dataset/scraped_uri.rb +305 -0
  18. data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
  19. data/lib/imw/dataset/scrub/scrub.rb +147 -0
  20. data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
  21. data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
  22. data/lib/imw/dataset/scrub/slug.rb +101 -0
  23. data/lib/imw/dataset/stats.rb +73 -0
  24. data/lib/imw/dataset/stats/counter.rb +23 -0
  25. data/lib/imw/dataset/task.rb +38 -0
  26. data/lib/imw/dataset/workflow.rb +81 -0
  27. data/lib/imw/files.rb +110 -0
  28. data/lib/imw/files/archive.rb +113 -0
  29. data/lib/imw/files/basicfile.rb +122 -0
  30. data/lib/imw/files/binary.rb +28 -0
  31. data/lib/imw/files/compressed_file.rb +93 -0
  32. data/lib/imw/files/compressed_files_and_archives.rb +348 -0
  33. data/lib/imw/files/compressible.rb +103 -0
  34. data/lib/imw/files/csv.rb +112 -0
  35. data/lib/imw/files/json.rb +41 -0
  36. data/lib/imw/files/sgml.rb +65 -0
  37. data/lib/imw/files/text.rb +68 -0
  38. data/lib/imw/files/yaml.rb +46 -0
  39. data/lib/imw/packagers.rb +8 -0
  40. data/lib/imw/packagers/archiver.rb +108 -0
  41. data/lib/imw/packagers/s3_mover.rb +28 -0
  42. data/lib/imw/parsers.rb +7 -0
  43. data/lib/imw/parsers/html_parser.rb +382 -0
  44. data/lib/imw/parsers/html_parser/matchers.rb +306 -0
  45. data/lib/imw/parsers/line_parser.rb +87 -0
  46. data/lib/imw/parsers/regexp_parser.rb +72 -0
  47. data/lib/imw/utils.rb +24 -0
  48. data/lib/imw/utils/components.rb +61 -0
  49. data/lib/imw/utils/config.rb +46 -0
  50. data/lib/imw/utils/error.rb +54 -0
  51. data/lib/imw/utils/extensions/array.rb +125 -0
  52. data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
  53. data/lib/imw/utils/extensions/core.rb +43 -0
  54. data/lib/imw/utils/extensions/dir.rb +24 -0
  55. data/lib/imw/utils/extensions/file_core.rb +64 -0
  56. data/lib/imw/utils/extensions/hash.rb +218 -0
  57. data/lib/imw/utils/extensions/hpricot.rb +48 -0
  58. data/lib/imw/utils/extensions/string.rb +49 -0
  59. data/lib/imw/utils/extensions/struct.rb +42 -0
  60. data/lib/imw/utils/extensions/symbol.rb +28 -0
  61. data/lib/imw/utils/extensions/typed_struct.rb +22 -0
  62. data/lib/imw/utils/extensions/uri.rb +59 -0
  63. data/lib/imw/utils/log.rb +67 -0
  64. data/lib/imw/utils/misc.rb +63 -0
  65. data/lib/imw/utils/paths.rb +115 -0
  66. data/lib/imw/utils/uri.rb +59 -0
  67. data/lib/imw/utils/uuid.rb +33 -0
  68. data/lib/imw/utils/validate.rb +38 -0
  69. data/lib/imw/utils/version.rb +12 -0
  70. data/lib/imw/utils/view.rb +113 -0
  71. data/lib/imw/utils/view/dump_csv.rb +112 -0
  72. data/lib/imw/utils/view/dump_csv_older.rb +117 -0
  73. data/spec/data/sample.csv +131 -0
  74. data/spec/data/sample.tsv +131 -0
  75. data/spec/data/sample.txt +131 -0
  76. data/spec/data/sample.xml +653 -0
  77. data/spec/data/sample.yaml +652 -0
  78. data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
  79. data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
  80. data/spec/imw/files/archive_spec.rb +118 -0
  81. data/spec/imw/files/basicfile_spec.rb +121 -0
  82. data/spec/imw/files/bz2_spec.rb +32 -0
  83. data/spec/imw/files/compressed_file_spec.rb +96 -0
  84. data/spec/imw/files/compressible_spec.rb +100 -0
  85. data/spec/imw/files/file_spec.rb +144 -0
  86. data/spec/imw/files/gz_spec.rb +32 -0
  87. data/spec/imw/files/rar_spec.rb +33 -0
  88. data/spec/imw/files/tar_spec.rb +31 -0
  89. data/spec/imw/files/text_spec.rb +23 -0
  90. data/spec/imw/files/zip_spec.rb +31 -0
  91. data/spec/imw/files_spec.rb +38 -0
  92. data/spec/imw/packagers/archiver_spec.rb +125 -0
  93. data/spec/imw/packagers/s3_mover_spec.rb +7 -0
  94. data/spec/imw/parsers/line_parser_spec.rb +96 -0
  95. data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
  96. data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
  97. data/spec/imw/utils/extensions/find_spec.rb +113 -0
  98. data/spec/imw/utils/paths_spec.rb +38 -0
  99. data/spec/imw/workflow/rip/local_spec.rb +89 -0
  100. data/spec/imw/workflow/rip_spec.rb +27 -0
  101. data/spec/rcov.opts +1 -0
  102. data/spec/spec.opts +4 -0
  103. data/spec/spec_helper.rb +32 -0
  104. data/spec/support/archive_contents_matcher.rb +94 -0
  105. data/spec/support/custom_matchers.rb +21 -0
  106. data/spec/support/directory_contents_matcher.rb +61 -0
  107. data/spec/support/extensions.rb +18 -0
  108. data/spec/support/file_contents_matcher.rb +50 -0
  109. data/spec/support/random.rb +210 -0
  110. data/spec/support/without_regard_to_order_matcher.rb +58 -0
  111. metadata +196 -0
@@ -0,0 +1,58 @@
1
+ #
2
+ # h2. spec/imw/matchers/without_regard_to_order_matcher.rb -- set matcher for non-sets
3
+ #
4
+ # == About
5
+ #
6
+ # A simple matcher which compares two objects as though they were
7
+ # sets, i.e. - without regard to the order of their elements.
8
+ #
9
+ # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
+ # Copyright:: Copyright (c) 2008 infochimps.org
11
+ # License:: GPL 3.0
12
+ # Website:: http://infinitemonkeywrench.org/
13
+ #
14
+
15
+ require 'set'
16
+ require 'imw/utils'
17
+
18
+ module Spec
19
+ module Matchers
20
+ module IMW
21
+
22
+ # Match the contents of two arrays without regard to the order
23
+ # of their elements by treating each as a set.
24
+ class WithoutRegardToOrder
25
+
26
+ private
27
+ def initialize known_array
28
+ @known_array = known_array.to_set
29
+ end
30
+
31
+ public
32
+ def matches? array_to_test
33
+ @array_to_test = array_to_test.to_set
34
+ @array_to_test == @known_array
35
+ end
36
+
37
+ def failure_message
38
+ missing_from_array_to_test = "missing from array to test: #{(@known_array - @array_to_test).to_a.quote_items_with "and"}\n"
39
+ missing_from_known_array = "missing from known array: #{(@array_to_test - @known_array).to_a.quote_items_with "and"}\n"
40
+ common_to_both = "common to both: #{(@array_to_test & @known_array).to_a.quote_items_with "and"}\n"
41
+ "expected contents of the arrays to be identical:\n\n#{missing_from_array_to_test}\n#{missing_from_known_array}\n#{common_to_both}"
42
+ end
43
+
44
+ def negative_failure_message
45
+ "expected contents of the arrays to differ."
46
+ end
47
+ end
48
+
49
+ # Check that the contents of one array match another without
50
+ # regard to ordering.
51
+ def match_without_regard_to_order known_array
52
+ WithoutRegardToOrder.new(known_array)
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ # puts "#{File.basename(__FILE__)}: The leg bone's connected to the...knee bone, the knee bone's connected...wait, isn't it the other way 'round?" # at bottom
metadata ADDED
@@ -0,0 +1,196 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: imw
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Dhruv Bansal
8
+ - Philip (flip) Kromer
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-12-20 00:00:00 -06:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description: The Infinite Monkeywrench (IMW) is a Ruby frameworks to simplify the tasks of acquiring, extracting, transforming, loading, and packaging data. It minimizes programmer time by encapsulating common data workflows and patterns and creating interfaces to many other useful Ruby libraries.
18
+ email: coders@infochimps.org
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files:
24
+ - LICENSE
25
+ - README.rdoc
26
+ files:
27
+ - .gitignore
28
+ - CHANGELOG
29
+ - LICENSE
30
+ - README.rdoc
31
+ - Rakefile
32
+ - VERSION
33
+ - etc/imwrc.rb
34
+ - lib/imw.rb
35
+ - lib/imw/boot.rb
36
+ - lib/imw/dataset.rb
37
+ - lib/imw/dataset/datamapper.rb
38
+ - lib/imw/dataset/datamapper/time_and_user_stamps.rb
39
+ - lib/imw/dataset/loaddump.rb
40
+ - lib/imw/dataset/old/file_collection.rb
41
+ - lib/imw/dataset/old/file_collection_utils.rb
42
+ - lib/imw/dataset/scaffold.rb
43
+ - lib/imw/dataset/scraped_uri.rb
44
+ - lib/imw/dataset/scrub/old_working_scrubber.rb
45
+ - lib/imw/dataset/scrub/scrub.rb
46
+ - lib/imw/dataset/scrub/scrub_simple_url.rb
47
+ - lib/imw/dataset/scrub/scrub_test.rb
48
+ - lib/imw/dataset/scrub/slug.rb
49
+ - lib/imw/dataset/stats.rb
50
+ - lib/imw/dataset/stats/counter.rb
51
+ - lib/imw/dataset/task.rb
52
+ - lib/imw/dataset/workflow.rb
53
+ - lib/imw/files.rb
54
+ - lib/imw/files/archive.rb
55
+ - lib/imw/files/basicfile.rb
56
+ - lib/imw/files/binary.rb
57
+ - lib/imw/files/compressed_file.rb
58
+ - lib/imw/files/compressed_files_and_archives.rb
59
+ - lib/imw/files/compressible.rb
60
+ - lib/imw/files/csv.rb
61
+ - lib/imw/files/json.rb
62
+ - lib/imw/files/sgml.rb
63
+ - lib/imw/files/text.rb
64
+ - lib/imw/files/yaml.rb
65
+ - lib/imw/packagers.rb
66
+ - lib/imw/packagers/archiver.rb
67
+ - lib/imw/packagers/s3_mover.rb
68
+ - lib/imw/parsers.rb
69
+ - lib/imw/parsers/html_parser.rb
70
+ - lib/imw/parsers/html_parser/matchers.rb
71
+ - lib/imw/parsers/line_parser.rb
72
+ - lib/imw/parsers/regexp_parser.rb
73
+ - lib/imw/utils.rb
74
+ - lib/imw/utils/components.rb
75
+ - lib/imw/utils/config.rb
76
+ - lib/imw/utils/error.rb
77
+ - lib/imw/utils/extensions/array.rb
78
+ - lib/imw/utils/extensions/class/attribute_accessors.rb
79
+ - lib/imw/utils/extensions/core.rb
80
+ - lib/imw/utils/extensions/dir.rb
81
+ - lib/imw/utils/extensions/file_core.rb
82
+ - lib/imw/utils/extensions/hash.rb
83
+ - lib/imw/utils/extensions/hpricot.rb
84
+ - lib/imw/utils/extensions/string.rb
85
+ - lib/imw/utils/extensions/struct.rb
86
+ - lib/imw/utils/extensions/symbol.rb
87
+ - lib/imw/utils/extensions/typed_struct.rb
88
+ - lib/imw/utils/extensions/uri.rb
89
+ - lib/imw/utils/log.rb
90
+ - lib/imw/utils/misc.rb
91
+ - lib/imw/utils/paths.rb
92
+ - lib/imw/utils/uri.rb
93
+ - lib/imw/utils/uuid.rb
94
+ - lib/imw/utils/validate.rb
95
+ - lib/imw/utils/version.rb
96
+ - lib/imw/utils/view.rb
97
+ - lib/imw/utils/view/dump_csv.rb
98
+ - lib/imw/utils/view/dump_csv_older.rb
99
+ - spec/data/sample.csv
100
+ - spec/data/sample.tsv
101
+ - spec/data/sample.txt
102
+ - spec/data/sample.xml
103
+ - spec/data/sample.yaml
104
+ - spec/imw/dataset/datamapper/uri_spec.rb
105
+ - spec/imw/dataset/datamapper_spec_helper.rb
106
+ - spec/imw/files/archive_spec.rb
107
+ - spec/imw/files/basicfile_spec.rb
108
+ - spec/imw/files/bz2_spec.rb
109
+ - spec/imw/files/compressed_file_spec.rb
110
+ - spec/imw/files/compressible_spec.rb
111
+ - spec/imw/files/file_spec.rb
112
+ - spec/imw/files/gz_spec.rb
113
+ - spec/imw/files/rar_spec.rb
114
+ - spec/imw/files/tar_spec.rb
115
+ - spec/imw/files/text_spec.rb
116
+ - spec/imw/files/zip_spec.rb
117
+ - spec/imw/files_spec.rb
118
+ - spec/imw/packagers/archiver_spec.rb
119
+ - spec/imw/packagers/s3_mover_spec.rb
120
+ - spec/imw/parsers/line_parser_spec.rb
121
+ - spec/imw/parsers/regexp_parser_spec.rb
122
+ - spec/imw/utils/extensions/file_core_spec.rb
123
+ - spec/imw/utils/extensions/find_spec.rb
124
+ - spec/imw/utils/paths_spec.rb
125
+ - spec/imw/workflow/rip/local_spec.rb
126
+ - spec/imw/workflow/rip_spec.rb
127
+ - spec/rcov.opts
128
+ - spec/spec.opts
129
+ - spec/spec_helper.rb
130
+ - spec/support/archive_contents_matcher.rb
131
+ - spec/support/custom_matchers.rb
132
+ - spec/support/directory_contents_matcher.rb
133
+ - spec/support/extensions.rb
134
+ - spec/support/file_contents_matcher.rb
135
+ - spec/support/random.rb
136
+ - spec/support/without_regard_to_order_matcher.rb
137
+ has_rdoc: true
138
+ homepage: http://github.com/infochimps/imw
139
+ licenses: []
140
+
141
+ post_install_message:
142
+ rdoc_options:
143
+ - --charset=UTF-8
144
+ require_paths:
145
+ - lib
146
+ required_ruby_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: "0"
151
+ version:
152
+ required_rubygems_version: !ruby/object:Gem::Requirement
153
+ requirements:
154
+ - - ">="
155
+ - !ruby/object:Gem::Version
156
+ version: "0"
157
+ version:
158
+ requirements: []
159
+
160
+ rubyforge_project:
161
+ rubygems_version: 1.3.5
162
+ signing_key:
163
+ specification_version: 3
164
+ summary: The Infinite Monkeywrench (IMW) makes acquiring, extracting, transforming, loading, and packaging data easy.
165
+ test_files:
166
+ - spec/imw/packagers/archiver_spec.rb
167
+ - spec/imw/packagers/s3_mover_spec.rb
168
+ - spec/imw/workflow/rip/local_spec.rb
169
+ - spec/imw/workflow/rip_spec.rb
170
+ - spec/imw/dataset/datamapper_spec_helper.rb
171
+ - spec/imw/dataset/datamapper/uri_spec.rb
172
+ - spec/imw/parsers/line_parser_spec.rb
173
+ - spec/imw/parsers/regexp_parser_spec.rb
174
+ - spec/imw/files/compressed_file_spec.rb
175
+ - spec/imw/files/basicfile_spec.rb
176
+ - spec/imw/files/file_spec.rb
177
+ - spec/imw/files/archive_spec.rb
178
+ - spec/imw/files/compressible_spec.rb
179
+ - spec/imw/files/tar_spec.rb
180
+ - spec/imw/files/zip_spec.rb
181
+ - spec/imw/files/text_spec.rb
182
+ - spec/imw/files/bz2_spec.rb
183
+ - spec/imw/files/rar_spec.rb
184
+ - spec/imw/files/gz_spec.rb
185
+ - spec/imw/files_spec.rb
186
+ - spec/imw/utils/paths_spec.rb
187
+ - spec/imw/utils/extensions/find_spec.rb
188
+ - spec/imw/utils/extensions/file_core_spec.rb
189
+ - spec/spec_helper.rb
190
+ - spec/support/without_regard_to_order_matcher.rb
191
+ - spec/support/extensions.rb
192
+ - spec/support/archive_contents_matcher.rb
193
+ - spec/support/custom_matchers.rb
194
+ - spec/support/random.rb
195
+ - spec/support/file_contents_matcher.rb
196
+ - spec/support/directory_contents_matcher.rb