alf 0.9.3 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (270) hide show
  1. data/CHANGELOG.md +255 -129
  2. data/Gemfile +31 -1
  3. data/Gemfile.lock +17 -20
  4. data/LICENCE.md +1 -1
  5. data/Manifest.txt +2 -0
  6. data/README.md +37 -43
  7. data/TODO.md +1 -1
  8. data/alf.gemspec +10 -7
  9. data/alf.noespec +24 -13
  10. data/bin/alf +2 -2
  11. data/doc/commands/exec.md +16 -0
  12. data/doc/commands/help.md +11 -0
  13. data/doc/commands/main.md +33 -0
  14. data/doc/commands/show.md +19 -0
  15. data/doc/operators/non_relational/autonum.md +23 -0
  16. data/doc/operators/non_relational/clip.md +31 -0
  17. data/doc/operators/non_relational/coerce.md +15 -0
  18. data/doc/operators/non_relational/compact.md +20 -0
  19. data/doc/operators/non_relational/defaults.md +32 -0
  20. data/doc/operators/non_relational/generator.md +20 -0
  21. data/doc/operators/non_relational/sort.md +24 -0
  22. data/doc/operators/relational/extend.md +18 -0
  23. data/doc/operators/relational/group.md +27 -0
  24. data/doc/operators/relational/intersect.md +13 -0
  25. data/doc/operators/relational/join.md +27 -0
  26. data/doc/operators/relational/matching.md +20 -0
  27. data/doc/operators/relational/minus.md +12 -0
  28. data/doc/operators/relational/not-matching.md +20 -0
  29. data/doc/operators/relational/project.md +28 -0
  30. data/doc/operators/relational/quota.md +21 -0
  31. data/doc/operators/relational/rank.md +27 -0
  32. data/doc/operators/relational/rename.md +17 -0
  33. data/doc/operators/relational/restrict.md +25 -0
  34. data/doc/operators/relational/summarize.md +25 -0
  35. data/doc/operators/relational/ungroup.md +20 -0
  36. data/doc/operators/relational/union.md +14 -0
  37. data/doc/operators/relational/unwrap.md +20 -0
  38. data/doc/operators/relational/wrap.md +24 -0
  39. data/examples/csv/suppliers.csv +6 -0
  40. data/examples/logs/access.log +1000 -0
  41. data/examples/logs/combined.alf +2 -0
  42. data/examples/logs/hits.alf +14 -0
  43. data/examples/logs/not_found.alf +7 -0
  44. data/examples/logs/robots-cheating.alf +11 -0
  45. data/examples/logs/robots.alf +8 -0
  46. data/examples/northwind/customers.csv +92 -0
  47. data/examples/northwind/northwind.db +0 -0
  48. data/examples/northwind/orders.csv +831 -0
  49. data/examples/operators/clip.alf +1 -1
  50. data/examples/operators/database.alf +5 -6
  51. data/examples/operators/defaults.alf +1 -1
  52. data/examples/operators/group.alf +1 -1
  53. data/examples/operators/project.alf +2 -1
  54. data/examples/operators/pseudo-with.alf +2 -2
  55. data/examples/operators/quota.alf +2 -2
  56. data/examples/operators/summarize.alf +2 -2
  57. data/lib/alf/aggregator/aggregators.rb +77 -0
  58. data/lib/alf/aggregator/base.rb +95 -0
  59. data/lib/alf/aggregator/class_methods.rb +57 -0
  60. data/lib/alf/buffer/sorted.rb +48 -0
  61. data/lib/alf/command/class_methods.rb +27 -0
  62. data/lib/alf/command/doc_manager.rb +72 -0
  63. data/lib/alf/command/exec.rb +12 -0
  64. data/lib/alf/command/help.rb +31 -0
  65. data/lib/alf/command/main.rb +146 -0
  66. data/lib/alf/command/show.rb +33 -0
  67. data/lib/alf/environment/base.rb +37 -0
  68. data/lib/alf/environment/class_methods.rb +93 -0
  69. data/lib/alf/environment/explicit.rb +38 -0
  70. data/lib/alf/environment/folder.rb +62 -0
  71. data/lib/alf/extra/csv.rb +104 -0
  72. data/lib/alf/extra/logs.rb +100 -0
  73. data/lib/alf/extra/sequel.rb +77 -0
  74. data/lib/alf/{yaml.rb → extra/yaml.rb} +0 -0
  75. data/lib/alf/extra.rb +5 -0
  76. data/lib/alf/iterator/base.rb +38 -0
  77. data/lib/alf/iterator/class_methods.rb +22 -0
  78. data/lib/alf/iterator/proxy.rb +33 -0
  79. data/lib/alf/lispy/instance_methods.rb +157 -0
  80. data/lib/alf/operator/base.rb +74 -0
  81. data/lib/alf/operator/binary.rb +32 -0
  82. data/lib/alf/operator/cesure.rb +45 -0
  83. data/lib/alf/operator/class_methods.rb +132 -0
  84. data/lib/alf/operator/experimental.rb +9 -0
  85. data/lib/alf/operator/non_relational/autonum.rb +24 -0
  86. data/lib/alf/operator/non_relational/clip.rb +20 -0
  87. data/lib/alf/operator/non_relational/coerce.rb +21 -0
  88. data/lib/alf/operator/non_relational/compact.rb +62 -0
  89. data/lib/alf/operator/non_relational/defaults.rb +25 -0
  90. data/lib/alf/operator/non_relational/generator.rb +38 -0
  91. data/lib/alf/operator/non_relational/sort.rb +23 -0
  92. data/lib/alf/operator/nullary.rb +20 -0
  93. data/lib/alf/operator/relational/extend.rb +24 -0
  94. data/lib/alf/operator/relational/group.rb +32 -0
  95. data/lib/alf/operator/relational/intersect.rb +37 -0
  96. data/lib/alf/operator/relational/join.rb +106 -0
  97. data/lib/alf/operator/relational/matching.rb +45 -0
  98. data/lib/alf/operator/relational/minus.rb +37 -0
  99. data/lib/alf/operator/relational/not_matching.rb +45 -0
  100. data/lib/alf/operator/relational/project.rb +22 -0
  101. data/lib/alf/operator/relational/quota.rb +51 -0
  102. data/lib/alf/operator/relational/rank.rb +55 -0
  103. data/lib/alf/operator/relational/rename.rb +19 -0
  104. data/lib/alf/operator/relational/restrict.rb +20 -0
  105. data/lib/alf/operator/relational/summarize.rb +83 -0
  106. data/lib/alf/operator/relational/ungroup.rb +25 -0
  107. data/lib/alf/operator/relational/union.rb +32 -0
  108. data/lib/alf/operator/relational/unwrap.rb +21 -0
  109. data/lib/alf/operator/relational/wrap.rb +22 -0
  110. data/lib/alf/operator/shortcut.rb +53 -0
  111. data/lib/alf/operator/signature.rb +262 -0
  112. data/lib/alf/operator/transform.rb +27 -0
  113. data/lib/alf/operator/unary.rb +38 -0
  114. data/lib/alf/reader/alf_file.rb +24 -0
  115. data/lib/alf/reader/base.rb +119 -0
  116. data/lib/alf/reader/class_methods.rb +82 -0
  117. data/lib/alf/reader/rash.rb +28 -0
  118. data/lib/alf/relation/class_methods.rb +37 -0
  119. data/lib/alf/relation/instance_methods.rb +127 -0
  120. data/lib/alf/renderer/base.rb +72 -0
  121. data/lib/alf/renderer/class_methods.rb +58 -0
  122. data/lib/alf/renderer/rash.rb +19 -0
  123. data/lib/alf/{text.rb → renderer/text.rb} +1 -1
  124. data/lib/alf/tools/coerce.rb +14 -0
  125. data/lib/alf/tools/miscellaneous.rb +77 -0
  126. data/lib/alf/tools/to_lispy.rb +99 -0
  127. data/lib/alf/tools/to_ruby_literal.rb +14 -0
  128. data/lib/alf/tools/tuple_handle.rb +50 -0
  129. data/lib/alf/types/attr_list.rb +56 -0
  130. data/lib/alf/types/attr_name.rb +28 -0
  131. data/lib/alf/types/boolean.rb +12 -0
  132. data/lib/alf/types/heading.rb +96 -0
  133. data/lib/alf/types/ordering.rb +93 -0
  134. data/lib/alf/types/renaming.rb +57 -0
  135. data/lib/alf/types/summarization.rb +76 -0
  136. data/lib/alf/types/tuple_computation.rb +61 -0
  137. data/lib/alf/types/tuple_expression.rb +61 -0
  138. data/lib/alf/types/tuple_predicate.rb +49 -0
  139. data/lib/alf/version.rb +2 -2
  140. data/lib/alf.rb +193 -3714
  141. data/spec/integration/__database__/group.alf +1 -1
  142. data/spec/integration/__database__/suppliers_csv.csv +6 -0
  143. data/spec/integration/command/alf/alf.db +0 -0
  144. data/spec/integration/command/alf/alf_env_sqlite.cmd +1 -0
  145. data/spec/integration/command/alf/alf_env_sqlite.stdout +9 -0
  146. data/spec/integration/command/alf/alf_help.cmd +1 -0
  147. data/spec/integration/command/alf/alf_help.stdout +67 -0
  148. data/spec/integration/command/autonum/autonum_0.cmd +1 -1
  149. data/spec/integration/command/coerce/coerce_1.cmd +1 -0
  150. data/spec/integration/command/coerce/coerce_1.stdout +5 -0
  151. data/spec/integration/command/defaults/defaults_0.cmd +1 -1
  152. data/spec/integration/command/defaults/defaults_0.stdout +9 -9
  153. data/spec/integration/command/defaults/defaults_2.cmd +1 -0
  154. data/spec/integration/command/defaults/defaults_2.stdout +9 -0
  155. data/spec/integration/command/generator/generator_1.cmd +1 -0
  156. data/spec/integration/command/generator/generator_1.stdout +10 -0
  157. data/spec/integration/command/generator/generator_2.cmd +1 -0
  158. data/spec/integration/command/generator/generator_2.stdout +5 -0
  159. data/spec/integration/command/generator/generator_3.cmd +1 -0
  160. data/spec/integration/command/generator/generator_3.stdout +5 -0
  161. data/spec/integration/command/group/group_0.cmd +1 -1
  162. data/spec/integration/command/group/group_1.cmd +1 -1
  163. data/spec/integration/command/help/help_1.cmd +1 -0
  164. data/spec/integration/command/help/help_1.stdout +22 -0
  165. data/spec/integration/command/quota/quota_0.cmd +1 -1
  166. data/spec/integration/command/rank/rank_1.cmd +1 -1
  167. data/spec/integration/command/rank/rank_1.stdout +10 -10
  168. data/spec/integration/command/rank/rank_2.cmd +1 -1
  169. data/spec/integration/command/rank/rank_2.stdout +10 -10
  170. data/spec/integration/command/rank/rank_3.cmd +1 -1
  171. data/spec/integration/command/rank/rank_3.stdout +10 -10
  172. data/spec/integration/command/rank/rank_4.cmd +1 -1
  173. data/spec/integration/command/rank/rank_5.cmd +1 -1
  174. data/spec/integration/command/show/show_csv.cmd +1 -0
  175. data/spec/integration/command/show/show_csv.stdout +6 -0
  176. data/spec/integration/command/show/show_rash_2.cmd +1 -1
  177. data/spec/integration/command/show/show_rash_2.stdout +5 -5
  178. data/spec/integration/command/sort/sort_0.cmd +1 -1
  179. data/spec/integration/command/sort/sort_1.cmd +1 -1
  180. data/spec/integration/command/sort/sort_1.stdout +2 -2
  181. data/spec/integration/command/sort/sort_2.cmd +1 -0
  182. data/spec/integration/command/sort/sort_2.stdout +9 -0
  183. data/spec/integration/command/sort/sort_3.cmd +1 -0
  184. data/spec/integration/command/sort/sort_3.stdout +9 -0
  185. data/spec/integration/command/summarize/summarize_0.cmd +1 -1
  186. data/spec/integration/command/ungroup/ungroup_0.cmd +1 -1
  187. data/spec/integration/command/wrap/wrap_0.cmd +1 -1
  188. data/spec/integration/semantics/test_project.alf +5 -6
  189. data/spec/integration/semantics/test_rank.alf +16 -16
  190. data/spec/integration/test_command.rb +17 -6
  191. data/spec/integration/test_examples.rb +1 -1
  192. data/spec/regression/logs/apache_combined.log +5 -0
  193. data/spec/regression/logs/test_path_attribute.rb +25 -0
  194. data/spec/regression/relation/test_relation_allbut_all.rb +14 -0
  195. data/spec/shared/an_operator_class.rb +10 -5
  196. data/spec/spec_helper.rb +1 -7
  197. data/spec/unit/assumptions/test_set.rb +64 -0
  198. data/spec/unit/command/doc_manager/dynamic.md +1 -0
  199. data/spec/unit/command/doc_manager/example.md +1 -0
  200. data/spec/unit/command/doc_manager/example_1.txt +11 -0
  201. data/spec/unit/command/doc_manager/static.md +1 -0
  202. data/spec/unit/command/doc_manager/test_call.rb +49 -0
  203. data/spec/unit/csv/input.csv +3 -0
  204. data/spec/unit/csv/test_reader.rb +66 -0
  205. data/spec/unit/csv/test_renderer.rb +73 -0
  206. data/spec/unit/lispy/test_relation.rb +37 -0
  207. data/spec/unit/lispy/test_run.rb +40 -0
  208. data/spec/unit/lispy/test_tuple.rb +36 -0
  209. data/spec/unit/logs/apache_combined.log +5 -0
  210. data/spec/unit/logs/postgresql.log +29 -0
  211. data/spec/unit/logs/test_reader.rb +56 -0
  212. data/spec/unit/operator/non_relational/compact/{buffer_based.rb → test_buffer_based.rb} +0 -0
  213. data/spec/unit/operator/non_relational/test_clip.rb +1 -1
  214. data/spec/unit/operator/non_relational/test_coerce.rb +35 -0
  215. data/spec/unit/operator/non_relational/test_defaults.rb +15 -2
  216. data/spec/unit/operator/non_relational/test_generator.rb +78 -0
  217. data/spec/unit/operator/relational/join/test_hash_based.rb +4 -4
  218. data/spec/unit/operator/relational/matching/test_hash_based.rb +6 -6
  219. data/spec/unit/operator/relational/not_matching/test_hash_based.rb +4 -4
  220. data/spec/unit/operator/relational/summarize/test_hash_based.rb +10 -6
  221. data/spec/unit/operator/relational/summarize/test_sort_based.rb +18 -7
  222. data/spec/unit/operator/relational/test_group.rb +8 -8
  223. data/spec/unit/operator/relational/test_intersect.rb +3 -3
  224. data/spec/unit/operator/relational/test_minus.rb +3 -3
  225. data/spec/unit/operator/relational/test_project.rb +12 -2
  226. data/spec/unit/operator/relational/test_quota.rb +5 -6
  227. data/spec/unit/operator/relational/test_summarize.rb +9 -11
  228. data/spec/unit/operator/relational/test_union.rb +1 -1
  229. data/spec/unit/operator/relational/test_wrap.rb +1 -1
  230. data/spec/unit/operator/signature/test_collect_on.rb +45 -0
  231. data/spec/unit/operator/signature/test_initialize.rb +17 -0
  232. data/spec/unit/operator/signature/test_install.rb +56 -0
  233. data/spec/unit/operator/signature/test_option_parser.rb +36 -0
  234. data/spec/unit/operator/signature/test_parse_args.rb +60 -0
  235. data/spec/unit/operator/signature/test_parse_argv.rb +87 -0
  236. data/spec/unit/operator/signature/test_to_lispy.rb +102 -0
  237. data/spec/unit/operator/signature/test_to_shell.rb +103 -0
  238. data/spec/unit/operator/test_non_relational.rb +3 -1
  239. data/spec/unit/relation/test_relops.rb +20 -15
  240. data/spec/unit/sequel/alf.db +0 -0
  241. data/spec/unit/sequel/test_environment.rb +54 -0
  242. data/spec/unit/test_aggregator.rb +32 -22
  243. data/spec/unit/test_environment.rb +5 -0
  244. data/spec/unit/test_lispy.rb +4 -0
  245. data/spec/unit/test_relation.rb +5 -0
  246. data/spec/unit/text/test_cell.rb +6 -6
  247. data/spec/unit/text/test_row.rb +3 -3
  248. data/spec/unit/text/test_table.rb +6 -6
  249. data/spec/unit/tools/test_coalesce.rb +15 -0
  250. data/spec/unit/tools/test_coerce.rb +10 -0
  251. data/spec/unit/tools/test_to_lispy.rb +138 -0
  252. data/spec/unit/tools/test_to_ruby_literal.rb +10 -0
  253. data/spec/unit/tools/test_tuple_handle.rb +1 -59
  254. data/spec/unit/types/test_attr_list.rb +106 -0
  255. data/spec/unit/types/test_attr_name.rb +52 -0
  256. data/spec/unit/{test_heading.rb → types/test_heading.rb} +10 -0
  257. data/spec/unit/types/test_ordering.rb +127 -0
  258. data/spec/unit/types/test_renaming.rb +55 -0
  259. data/spec/unit/types/test_summarization.rb +63 -0
  260. data/spec/unit/types/test_tuple_computation.rb +60 -0
  261. data/spec/unit/types/test_tuple_expression.rb +64 -0
  262. data/spec/unit/types/test_tuple_predicate.rb +79 -0
  263. data/tasks/debug_mail.rake +1 -1
  264. data/tasks/debug_mail.txt +5 -0
  265. data/tasks/gh-pages.rake +63 -0
  266. metadata +325 -52
  267. data/spec/unit/operator/test_command_methods.rb +0 -38
  268. data/spec/unit/tools/test_ordering_key.rb +0 -94
  269. data/spec/unit/tools/test_parse_commandline_args.rb +0 -47
  270. data/spec/unit/tools/test_projection_key.rb +0 -83
@@ -0,0 +1,37 @@
1
+ module Alf
2
+ class Environment
3
+ module Base
4
+
5
+ #
6
+ # Returns a dataset whose name is provided.
7
+ #
8
+ # This method resolves named datasets to tuple enumerables. When the
9
+ # dataset exists, this method must return an Iterator, typically a
10
+ # Reader instance. Otherwise, it must throw a NoSuchDatasetError.
11
+ #
12
+ # @param [Symbol] name the name of a dataset
13
+ # @return [Iterator] an iterator, typically a Reader instance
14
+ # @raise [NoSuchDatasetError] when the dataset does not exists
15
+ #
16
+ def dataset(name)
17
+ end
18
+ undef :dataset
19
+
20
+ #
21
+ # Branches this environment and puts some additional explicit
22
+ # definitions.
23
+ #
24
+ # This method is provided for (with ...) expressions and should not
25
+ # be overriden by subclasses.
26
+ #
27
+ # @param [Hash] a set of (name, Iterator) pairs.
28
+ # @return [Environment] an environment instance with new definitions set
29
+ #
30
+ def branch(defs)
31
+ Explicit.new(defs, self)
32
+ end
33
+
34
+ end # module Base
35
+ include(Base)
36
+ end # class Environment
37
+ end # module Alf
@@ -0,0 +1,93 @@
1
+ module Alf
2
+ class Environment
3
+ module ClassMethods
4
+
5
+ #
6
+ # Returns registered environments
7
+ #
8
+ def environments
9
+ @environments ||= []
10
+ end
11
+
12
+ #
13
+ # Register an environment class under a specific name.
14
+ #
15
+ # Registered class must implement a recognizes? method that takes an array
16
+ # of arguments; it must returns true if an environment instance can be built
17
+ # using those arguments, false otherwise. Please be very specific in the
18
+ # implementation for returning true. See also autodetect and recognizes?
19
+ #
20
+ # @param [Symbol] name name of the environment kind
21
+ # @param [Class] clazz class that implemented the environment
22
+ #
23
+ def register(name, clazz)
24
+ environments << [name, clazz]
25
+ (class << self; self; end).
26
+ send(:define_method, name) do |*args|
27
+ clazz.new(*args)
28
+ end
29
+ end
30
+
31
+ #
32
+ # Auto-detect the environment to use for specific arguments.
33
+ #
34
+ # This method returns an instance of the first registered Environment class
35
+ # that returns true to an invocation of recognizes?(args). It raises an
36
+ # ArgumentError if no such class can be found.
37
+ #
38
+ # @return [Environment] an environment instance
39
+ # @raise [ArgumentError] when no registered class recognizes the arguments
40
+ #
41
+ def autodetect(*args)
42
+ if (args.size == 1) && args.first.is_a?(Environment)
43
+ return args.first
44
+ else
45
+ environments.each do |name,clazz|
46
+ return clazz.new(*args) if clazz.recognizes?(args)
47
+ end
48
+ end
49
+ raise ArgumentError, "Unable to auto-detect Environment with #{args.inspect}"
50
+ end
51
+
52
+ #
53
+ # (see Environment.autodetect)
54
+ #
55
+ def coerce(*args)
56
+ autodetect(*args)
57
+ end
58
+
59
+ #
60
+ # Returns true _args_ can be used for building an environment instance,
61
+ # false otherwise.
62
+ #
63
+ # When returning true, an immediate invocation of new(*args) should
64
+ # succeed. While runtime exception are admitted (no such database, for
65
+ # example), argument errors should not occur (missing argument, wrong
66
+ # typing, etc.).
67
+ #
68
+ # Please be specific in the implementation of this extension point, as
69
+ # registered environments for a chain and each of them should have a
70
+ # chance of being selected.
71
+ #
72
+ def recognizes?(args)
73
+ false
74
+ end
75
+
76
+ #
77
+ # Returns the default environment
78
+ #
79
+ def default
80
+ examples
81
+ end
82
+
83
+ #
84
+ # Returns the examples environment
85
+ #
86
+ def examples
87
+ folder File.expand_path('../../../../examples/operators', __FILE__)
88
+ end
89
+
90
+ end # module ClassMethods
91
+ extend(ClassMethods)
92
+ end # class Environment
93
+ end # module Alf
@@ -0,0 +1,38 @@
1
+ module Alf
2
+ class Environment
3
+ #
4
+ # Specialization of Environment that works with explicitely defined
5
+ # datasources and allow branching and unbranching.
6
+ #
7
+ class Explicit < Environment
8
+
9
+ #
10
+ # Creates a new environment instance with initial definitions
11
+ # and optional child environment.
12
+ #
13
+ def initialize(defs = {}, child = nil)
14
+ @defs = defs
15
+ @child = child
16
+ end
17
+
18
+ #
19
+ # Unbranches this environment and returns its child
20
+ #
21
+ def unbranch
22
+ @child
23
+ end
24
+
25
+ # (see Environment#dataset)
26
+ def dataset(name)
27
+ if @defs.has_key?(name)
28
+ @defs[name]
29
+ elsif @child
30
+ @child.dataset(name)
31
+ else
32
+ raise "No such dataset #{name}"
33
+ end
34
+ end
35
+
36
+ end # class Explicit
37
+ end # class Environment
38
+ end # module Alf
@@ -0,0 +1,62 @@
1
+ module Alf
2
+ class Environment
3
+ #
4
+ # Specialization of Environment to work on files of a given folder.
5
+ #
6
+ # This kind of environment resolves datasets by simply looking at
7
+ # recognized files in a specific folder. "Recognized" files are simply
8
+ # those for which a Reader subclass has been previously registered.
9
+ # This environment then serves reader instances.
10
+ #
11
+ class Folder < Environment
12
+
13
+ #
14
+ # (see Environment.recognizes?)
15
+ #
16
+ # Returns true if args contains onely a String which is an existing
17
+ # folder.
18
+ #
19
+ def self.recognizes?(args)
20
+ (args.size == 1) &&
21
+ args.first.is_a?(String) &&
22
+ File.directory?(args.first.to_s)
23
+ end
24
+
25
+ #
26
+ # Creates an environment instance, wired to the specified folder.
27
+ #
28
+ # @param [String] folder path to the folder to use as dataset source
29
+ #
30
+ def initialize(folder)
31
+ @folder = folder
32
+ end
33
+
34
+ # (see Environment#dataset)
35
+ def dataset(name)
36
+ if file = find_file(name)
37
+ Reader.reader(file, self)
38
+ else
39
+ raise "No such dataset #{name} (#{@folder})"
40
+ end
41
+ end
42
+
43
+ protected
44
+
45
+ def find_file(name)
46
+ # TODO: refactor this, because it allows getting out of the folder
47
+ if File.exists?(name.to_s)
48
+ name.to_s
49
+ elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
50
+ File.file?(explicit)
51
+ explicit
52
+ else
53
+ Dir[File.join(@folder, "#{name}.*")].find do |f|
54
+ File.file?(f)
55
+ end
56
+ end
57
+ end
58
+
59
+ Environment.register(:folder, self)
60
+ end # class Folder
61
+ end # class Environment
62
+ end # module Alf
@@ -0,0 +1,104 @@
1
+ module Alf
2
+ module CSV
3
+
4
+ #
5
+ # Provides common tooling to CSV renderer and readers
6
+ #
7
+ module Commons
8
+
9
+ DEFAULT_OPTIONS = {
10
+ :headers => true
11
+ }
12
+
13
+ private
14
+
15
+ #
16
+ # Returns CSV in ruby 1.9 and FasterCSV for ruby < 1.9.
17
+ # This method handles require as well.
18
+ #
19
+ def get_csv_class
20
+ if RUBY_VERSION >= "1.9"
21
+ require 'csv'
22
+ ::CSV
23
+ else
24
+ ::Alf::Tools::friendly_require('fastercsv')
25
+ ::FasterCSV
26
+ end
27
+ end
28
+
29
+ #
30
+ # Returns a CSV instance bound to a given io and options
31
+ #
32
+ def get_csv(io)
33
+ get_csv_class.new(io, options)
34
+ end
35
+
36
+ end # module Commons
37
+
38
+ #
39
+ # Implements Alf::Renderer contract for outputting CSV files.
40
+ #
41
+ class Renderer < Alf::Renderer
42
+ include CSV::Commons
43
+
44
+ protected
45
+
46
+ # (see Renderer#render)
47
+ def render(input, output)
48
+ csv = get_csv(output)
49
+ header = nil
50
+ input.each do |tuple|
51
+ unless header
52
+ header = extract_header(tuple)
53
+ csv << header.collect{|k| k.to_s}
54
+ end
55
+ csv << extract_row(tuple, header)
56
+ end
57
+ output
58
+ end
59
+
60
+ private
61
+
62
+ def extract_header(tuple)
63
+ tuple.keys
64
+ end
65
+
66
+ def extract_row(tuple, header)
67
+ header.collect{|k| tuple[k]}
68
+ end
69
+
70
+ ::Alf::Renderer.register(:csv, "as a csv file", self)
71
+ end # class Renderer
72
+
73
+ #
74
+ # Implements Alf::Reader contract for reading CSV files.
75
+ #
76
+ class Reader < Alf::Reader
77
+ include CSV::Commons
78
+
79
+ def each
80
+ with_input_io do |io|
81
+ block = Proc.new{|row|
82
+ next if row.header_row?
83
+ yield(symbolize_keys(row.to_hash))
84
+ }
85
+ case io
86
+ when StringIO
87
+ get_csv_class.parse(io.string, options, &block)
88
+ else
89
+ get_csv(io).each(&block)
90
+ end
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ def symbolize_keys(h)
97
+ Hash[h.collect{|k,v| [k.to_sym,v] }]
98
+ end
99
+
100
+ ::Alf::Reader.register(:csv, [".csv"], self)
101
+ end # class Reader
102
+
103
+ end # module CSV
104
+ end # module Alf
@@ -0,0 +1,100 @@
1
+ module Alf
2
+ module Logs
3
+
4
+ #
5
+ # Implements Alf::Reader contract for reading log files.
6
+ #
7
+ class Reader < Alf::Reader
8
+
9
+ DEFAULT_OPTIONS = {
10
+ :file_format => nil,
11
+ :line_def => :access
12
+ }
13
+
14
+ attr_reader :options
15
+
16
+ def initialize(*args)
17
+ Alf::Tools::friendly_require('request_log_analyzer')
18
+ super(*args)
19
+ @options[:file_format] = coerce_file_format(@options[:file_format])
20
+ end
21
+
22
+ def each
23
+ parser = infer_parser(input_path)
24
+ with_input_io do |io|
25
+ parser.parse_stream(io) do |req|
26
+ yield request_to_tuple(req)
27
+ end
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def coerce_file_format(file_format)
34
+ case file_format
35
+ when NilClass
36
+ nil
37
+ when RequestLogAnalyzer::FileFormat
38
+ file_format
39
+ when Symbol
40
+ RequestLogAnalyzer::FileFormat.load(file_format)
41
+ when Array
42
+ RequestLogAnalyzer::FileFormat.load(*file_format)
43
+ else
44
+ raise ArgumentError, "Invalid file format: #{file_format}"
45
+ end
46
+ end
47
+
48
+ def infer_parser(path)
49
+ file_format = @options[:file_format] || begin
50
+ unless path
51
+ raise NotImplementedError, "Logs::Reader does not work on IO for now"
52
+ end
53
+ RequestLogAnalyzer::FileFormat.autodetect(path)
54
+ end
55
+ RequestLogAnalyzer::Source::LogParser.new(file_format)
56
+ end
57
+
58
+ def request_to_tuple(req)
59
+ req.attributes
60
+ end
61
+
62
+ LOG_TYPES = {
63
+ # RequestLogAnalyzer::Request::Converters
64
+ :string => String,
65
+ :float => Float,
66
+ :decimal => Float,
67
+ :int => Integer,
68
+ :integer => Integer,
69
+ :sym => Symbol,
70
+ :symbol => Symbol,
71
+ :timestamp => Integer,
72
+ :traffic => Integer,
73
+ :duration => Float,
74
+ :epoch => Integer,
75
+ # AmazonS3
76
+ :nillable_string => String,
77
+ :referer => String,
78
+ :user_agent => String,
79
+ # Apache
80
+ :path => String,
81
+ # MySQL
82
+ :sql => String
83
+ }
84
+
85
+ def infer_heading(format, line_def = :access)
86
+ h = Hash[format.line_definitions[line_def].captures.collect{|capt|
87
+ [ capt[:name], to_type(capt[:type]) ]
88
+ }]
89
+ Alf::Heading.new(h)
90
+ end
91
+
92
+ def to_type(log_type)
93
+ LOG_TYPES[log_type] || String
94
+ end
95
+
96
+ ::Alf::Reader.register(:logs, [".log"], self)
97
+ end # class Reader
98
+
99
+ end # module Logs
100
+ end # module Alf
@@ -0,0 +1,77 @@
1
+ module Alf
2
+ module Sequel
3
+
4
+ #
5
+ # Specialization of Alf::Environment to distribute Sequel datasets
6
+ #
7
+ class Environment < ::Alf::Environment
8
+
9
+ #
10
+ # (see Alf::Environment.recognizes?)
11
+ #
12
+ # Returns true if args contains one String that can be interpreted as
13
+ # a valid database uri.
14
+ #
15
+ def self.recognizes?(args)
16
+ require 'uri'
17
+ return false unless (args.size == 1) && args.first.is_a?(String)
18
+ uri = URI::parse(args.first)
19
+ if uri.scheme || looks_a_sqlite_file?(args.first)
20
+ true
21
+ else
22
+ false
23
+ end
24
+ rescue ::URI::Error
25
+ false
26
+ end
27
+
28
+ def self.looks_a_sqlite_file?(f)
29
+ (File.file?(f) && File.extname(f).==(".db"))
30
+ end
31
+
32
+ # Creates an Environment instance
33
+ def initialize(uri, options = {})
34
+ @uri = self.class.looks_a_sqlite_file?(uri) ? "sqlite://#{uri}" : uri
35
+ @options = options
36
+ end
37
+
38
+ #
39
+ # (see Alf::Environment#dataset)
40
+ #
41
+ def dataset(name)
42
+ Iterator.new(connect[name])
43
+ end
44
+
45
+ private
46
+
47
+ # Creates a database connection
48
+ def connect
49
+ Alf::Tools::friendly_require('sequel')
50
+ @db ||= ::Sequel.connect(@uri, @options)
51
+ end
52
+
53
+ ::Alf::Environment.register(:sequel, self)
54
+ end # class Environment
55
+
56
+ # Specialization of Alg::Iterator to work on a Sequel dataset
57
+ class Iterator
58
+ include ::Alf::Iterator
59
+
60
+ def initialize(dataset)
61
+ @dataset = dataset
62
+ end
63
+
64
+ # (see Alf::Iterator#each)
65
+ def each
66
+ @dataset.each(&Proc.new)
67
+ end
68
+
69
+ # (see Alf::Iterator#pipe)
70
+ def pipe(input, env = nil)
71
+ self
72
+ end
73
+
74
+ end # class Iterator
75
+
76
+ end # module Sequel
77
+ end # module Alf
File without changes
data/lib/alf/extra.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'alf/extra/yaml'
2
+ require 'alf/extra/csv'
3
+ require 'alf/extra/logs'
4
+ require 'alf/extra/sequel'
5
+
@@ -0,0 +1,38 @@
1
+ module Alf
2
+ module Iterator
3
+ module Base
4
+
5
+ #
6
+ # Wire the iterator input and an optional execution environment.
7
+ #
8
+ # Iterators (typically Reader and Operator instances) work from input data
9
+ # that come from files, or other operators, and so on. This method wires
10
+ # this input data to the iterator. Wiring is required before any attempt
11
+ # to call each, unless autowiring occurs at construction. The exact kind of
12
+ # input object is left at discretion of Iterator implementations.
13
+ #
14
+ # @param [Object] input the iterator input, at discretion of the Iterator
15
+ # implementation.
16
+ # @param [Environment] environment an optional environment for resolving
17
+ # named datasets if needed.
18
+ # @return [Object] self
19
+ #
20
+ def pipe(input, environment = nil)
21
+ self
22
+ end
23
+ undef :pipe
24
+
25
+ #
26
+ # Converts this iterator to an in-memory Relation.
27
+ #
28
+ # @return [Relation] a relation instance, as the set of tuples
29
+ # that would be yield by this iterator.
30
+ #
31
+ def to_rel
32
+ Relation::coerce(self)
33
+ end
34
+
35
+ end # module Base
36
+ include(Base)
37
+ end # module Iterator
38
+ end # module Alf
@@ -0,0 +1,22 @@
1
+ module Alf
2
+ module Iterator
3
+ module ClassMethods
4
+
5
+ #
6
+ # Coerces something to an iterator
7
+ #
8
+ def coerce(arg, environment = nil)
9
+ case arg
10
+ when Iterator, Array
11
+ arg
12
+ when String, Symbol
13
+ Proxy.new(environment, arg.to_sym)
14
+ else
15
+ Reader.coerce(arg, environment)
16
+ end
17
+ end
18
+
19
+ end # module ClassMethods
20
+ extend(ClassMethods)
21
+ end # module Iterator
22
+ end # module Alf
@@ -0,0 +1,33 @@
1
+ module Alf
2
+ module Iterator
3
+ class Proxy
4
+ include Iterator
5
+
6
+ # @return [Symbol] name of the dataset to request to environment
7
+ attr_reader :dataset
8
+
9
+ #
10
+ # Creates a proxy instance.
11
+ #
12
+ # @param [Environment] env the environment serving iterator instances
13
+ # @param [Symbol] dataset named dataset to rely on
14
+ #
15
+ def initialize(env, dataset)
16
+ @environment, @dataset = env, dataset
17
+ end
18
+
19
+ # (see Iterator#pipe)
20
+ def pipe(input, environment = nil)
21
+ @environment ||= environment
22
+ @dataset ||= input
23
+ self
24
+ end
25
+
26
+ # (see Iterator#each)
27
+ def each(&block)
28
+ @environment.dataset(@dataset).each(&block)
29
+ end
30
+
31
+ end # class Proxy
32
+ end # module Iterator
33
+ end # module Alf