alf 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/CHANGELOG.md +255 -129
  2. data/Gemfile +31 -1
  3. data/Gemfile.lock +17 -20
  4. data/LICENCE.md +1 -1
  5. data/Manifest.txt +2 -0
  6. data/README.md +37 -43
  7. data/TODO.md +1 -1
  8. data/alf.gemspec +10 -7
  9. data/alf.noespec +24 -13
  10. data/bin/alf +2 -2
  11. data/doc/commands/exec.md +16 -0
  12. data/doc/commands/help.md +11 -0
  13. data/doc/commands/main.md +33 -0
  14. data/doc/commands/show.md +19 -0
  15. data/doc/operators/non_relational/autonum.md +23 -0
  16. data/doc/operators/non_relational/clip.md +31 -0
  17. data/doc/operators/non_relational/coerce.md +15 -0
  18. data/doc/operators/non_relational/compact.md +20 -0
  19. data/doc/operators/non_relational/defaults.md +32 -0
  20. data/doc/operators/non_relational/generator.md +20 -0
  21. data/doc/operators/non_relational/sort.md +24 -0
  22. data/doc/operators/relational/extend.md +18 -0
  23. data/doc/operators/relational/group.md +27 -0
  24. data/doc/operators/relational/intersect.md +13 -0
  25. data/doc/operators/relational/join.md +27 -0
  26. data/doc/operators/relational/matching.md +20 -0
  27. data/doc/operators/relational/minus.md +12 -0
  28. data/doc/operators/relational/not-matching.md +20 -0
  29. data/doc/operators/relational/project.md +28 -0
  30. data/doc/operators/relational/quota.md +21 -0
  31. data/doc/operators/relational/rank.md +27 -0
  32. data/doc/operators/relational/rename.md +17 -0
  33. data/doc/operators/relational/restrict.md +25 -0
  34. data/doc/operators/relational/summarize.md +25 -0
  35. data/doc/operators/relational/ungroup.md +20 -0
  36. data/doc/operators/relational/union.md +14 -0
  37. data/doc/operators/relational/unwrap.md +20 -0
  38. data/doc/operators/relational/wrap.md +24 -0
  39. data/examples/csv/suppliers.csv +6 -0
  40. data/examples/logs/access.log +1000 -0
  41. data/examples/logs/combined.alf +2 -0
  42. data/examples/logs/hits.alf +14 -0
  43. data/examples/logs/not_found.alf +7 -0
  44. data/examples/logs/robots-cheating.alf +11 -0
  45. data/examples/logs/robots.alf +8 -0
  46. data/examples/northwind/customers.csv +92 -0
  47. data/examples/northwind/northwind.db +0 -0
  48. data/examples/northwind/orders.csv +831 -0
  49. data/examples/operators/clip.alf +1 -1
  50. data/examples/operators/database.alf +5 -6
  51. data/examples/operators/defaults.alf +1 -1
  52. data/examples/operators/group.alf +1 -1
  53. data/examples/operators/project.alf +2 -1
  54. data/examples/operators/pseudo-with.alf +2 -2
  55. data/examples/operators/quota.alf +2 -2
  56. data/examples/operators/summarize.alf +2 -2
  57. data/lib/alf/aggregator/aggregators.rb +77 -0
  58. data/lib/alf/aggregator/base.rb +95 -0
  59. data/lib/alf/aggregator/class_methods.rb +57 -0
  60. data/lib/alf/buffer/sorted.rb +48 -0
  61. data/lib/alf/command/class_methods.rb +27 -0
  62. data/lib/alf/command/doc_manager.rb +72 -0
  63. data/lib/alf/command/exec.rb +12 -0
  64. data/lib/alf/command/help.rb +31 -0
  65. data/lib/alf/command/main.rb +146 -0
  66. data/lib/alf/command/show.rb +33 -0
  67. data/lib/alf/environment/base.rb +37 -0
  68. data/lib/alf/environment/class_methods.rb +93 -0
  69. data/lib/alf/environment/explicit.rb +38 -0
  70. data/lib/alf/environment/folder.rb +62 -0
  71. data/lib/alf/extra/csv.rb +104 -0
  72. data/lib/alf/extra/logs.rb +100 -0
  73. data/lib/alf/extra/sequel.rb +77 -0
  74. data/lib/alf/{yaml.rb → extra/yaml.rb} +0 -0
  75. data/lib/alf/extra.rb +5 -0
  76. data/lib/alf/iterator/base.rb +38 -0
  77. data/lib/alf/iterator/class_methods.rb +22 -0
  78. data/lib/alf/iterator/proxy.rb +33 -0
  79. data/lib/alf/lispy/instance_methods.rb +157 -0
  80. data/lib/alf/operator/base.rb +74 -0
  81. data/lib/alf/operator/binary.rb +32 -0
  82. data/lib/alf/operator/cesure.rb +45 -0
  83. data/lib/alf/operator/class_methods.rb +132 -0
  84. data/lib/alf/operator/experimental.rb +9 -0
  85. data/lib/alf/operator/non_relational/autonum.rb +24 -0
  86. data/lib/alf/operator/non_relational/clip.rb +20 -0
  87. data/lib/alf/operator/non_relational/coerce.rb +21 -0
  88. data/lib/alf/operator/non_relational/compact.rb +62 -0
  89. data/lib/alf/operator/non_relational/defaults.rb +25 -0
  90. data/lib/alf/operator/non_relational/generator.rb +38 -0
  91. data/lib/alf/operator/non_relational/sort.rb +23 -0
  92. data/lib/alf/operator/nullary.rb +20 -0
  93. data/lib/alf/operator/relational/extend.rb +24 -0
  94. data/lib/alf/operator/relational/group.rb +32 -0
  95. data/lib/alf/operator/relational/intersect.rb +37 -0
  96. data/lib/alf/operator/relational/join.rb +106 -0
  97. data/lib/alf/operator/relational/matching.rb +45 -0
  98. data/lib/alf/operator/relational/minus.rb +37 -0
  99. data/lib/alf/operator/relational/not_matching.rb +45 -0
  100. data/lib/alf/operator/relational/project.rb +22 -0
  101. data/lib/alf/operator/relational/quota.rb +51 -0
  102. data/lib/alf/operator/relational/rank.rb +55 -0
  103. data/lib/alf/operator/relational/rename.rb +19 -0
  104. data/lib/alf/operator/relational/restrict.rb +20 -0
  105. data/lib/alf/operator/relational/summarize.rb +83 -0
  106. data/lib/alf/operator/relational/ungroup.rb +25 -0
  107. data/lib/alf/operator/relational/union.rb +32 -0
  108. data/lib/alf/operator/relational/unwrap.rb +21 -0
  109. data/lib/alf/operator/relational/wrap.rb +22 -0
  110. data/lib/alf/operator/shortcut.rb +53 -0
  111. data/lib/alf/operator/signature.rb +262 -0
  112. data/lib/alf/operator/transform.rb +27 -0
  113. data/lib/alf/operator/unary.rb +38 -0
  114. data/lib/alf/reader/alf_file.rb +24 -0
  115. data/lib/alf/reader/base.rb +119 -0
  116. data/lib/alf/reader/class_methods.rb +82 -0
  117. data/lib/alf/reader/rash.rb +28 -0
  118. data/lib/alf/relation/class_methods.rb +37 -0
  119. data/lib/alf/relation/instance_methods.rb +127 -0
  120. data/lib/alf/renderer/base.rb +72 -0
  121. data/lib/alf/renderer/class_methods.rb +58 -0
  122. data/lib/alf/renderer/rash.rb +19 -0
  123. data/lib/alf/{text.rb → renderer/text.rb} +1 -1
  124. data/lib/alf/tools/coerce.rb +14 -0
  125. data/lib/alf/tools/miscellaneous.rb +77 -0
  126. data/lib/alf/tools/to_lispy.rb +99 -0
  127. data/lib/alf/tools/to_ruby_literal.rb +14 -0
  128. data/lib/alf/tools/tuple_handle.rb +50 -0
  129. data/lib/alf/types/attr_list.rb +56 -0
  130. data/lib/alf/types/attr_name.rb +28 -0
  131. data/lib/alf/types/boolean.rb +12 -0
  132. data/lib/alf/types/heading.rb +96 -0
  133. data/lib/alf/types/ordering.rb +93 -0
  134. data/lib/alf/types/renaming.rb +57 -0
  135. data/lib/alf/types/summarization.rb +76 -0
  136. data/lib/alf/types/tuple_computation.rb +61 -0
  137. data/lib/alf/types/tuple_expression.rb +61 -0
  138. data/lib/alf/types/tuple_predicate.rb +49 -0
  139. data/lib/alf/version.rb +2 -2
  140. data/lib/alf.rb +193 -3714
  141. data/spec/integration/__database__/group.alf +1 -1
  142. data/spec/integration/__database__/suppliers_csv.csv +6 -0
  143. data/spec/integration/command/alf/alf.db +0 -0
  144. data/spec/integration/command/alf/alf_env_sqlite.cmd +1 -0
  145. data/spec/integration/command/alf/alf_env_sqlite.stdout +9 -0
  146. data/spec/integration/command/alf/alf_help.cmd +1 -0
  147. data/spec/integration/command/alf/alf_help.stdout +67 -0
  148. data/spec/integration/command/autonum/autonum_0.cmd +1 -1
  149. data/spec/integration/command/coerce/coerce_1.cmd +1 -0
  150. data/spec/integration/command/coerce/coerce_1.stdout +5 -0
  151. data/spec/integration/command/defaults/defaults_0.cmd +1 -1
  152. data/spec/integration/command/defaults/defaults_0.stdout +9 -9
  153. data/spec/integration/command/defaults/defaults_2.cmd +1 -0
  154. data/spec/integration/command/defaults/defaults_2.stdout +9 -0
  155. data/spec/integration/command/generator/generator_1.cmd +1 -0
  156. data/spec/integration/command/generator/generator_1.stdout +10 -0
  157. data/spec/integration/command/generator/generator_2.cmd +1 -0
  158. data/spec/integration/command/generator/generator_2.stdout +5 -0
  159. data/spec/integration/command/generator/generator_3.cmd +1 -0
  160. data/spec/integration/command/generator/generator_3.stdout +5 -0
  161. data/spec/integration/command/group/group_0.cmd +1 -1
  162. data/spec/integration/command/group/group_1.cmd +1 -1
  163. data/spec/integration/command/help/help_1.cmd +1 -0
  164. data/spec/integration/command/help/help_1.stdout +22 -0
  165. data/spec/integration/command/quota/quota_0.cmd +1 -1
  166. data/spec/integration/command/rank/rank_1.cmd +1 -1
  167. data/spec/integration/command/rank/rank_1.stdout +10 -10
  168. data/spec/integration/command/rank/rank_2.cmd +1 -1
  169. data/spec/integration/command/rank/rank_2.stdout +10 -10
  170. data/spec/integration/command/rank/rank_3.cmd +1 -1
  171. data/spec/integration/command/rank/rank_3.stdout +10 -10
  172. data/spec/integration/command/rank/rank_4.cmd +1 -1
  173. data/spec/integration/command/rank/rank_5.cmd +1 -1
  174. data/spec/integration/command/show/show_csv.cmd +1 -0
  175. data/spec/integration/command/show/show_csv.stdout +6 -0
  176. data/spec/integration/command/show/show_rash_2.cmd +1 -1
  177. data/spec/integration/command/show/show_rash_2.stdout +5 -5
  178. data/spec/integration/command/sort/sort_0.cmd +1 -1
  179. data/spec/integration/command/sort/sort_1.cmd +1 -1
  180. data/spec/integration/command/sort/sort_1.stdout +2 -2
  181. data/spec/integration/command/sort/sort_2.cmd +1 -0
  182. data/spec/integration/command/sort/sort_2.stdout +9 -0
  183. data/spec/integration/command/sort/sort_3.cmd +1 -0
  184. data/spec/integration/command/sort/sort_3.stdout +9 -0
  185. data/spec/integration/command/summarize/summarize_0.cmd +1 -1
  186. data/spec/integration/command/ungroup/ungroup_0.cmd +1 -1
  187. data/spec/integration/command/wrap/wrap_0.cmd +1 -1
  188. data/spec/integration/semantics/test_project.alf +5 -6
  189. data/spec/integration/semantics/test_rank.alf +16 -16
  190. data/spec/integration/test_command.rb +17 -6
  191. data/spec/integration/test_examples.rb +1 -1
  192. data/spec/regression/logs/apache_combined.log +5 -0
  193. data/spec/regression/logs/test_path_attribute.rb +25 -0
  194. data/spec/regression/relation/test_relation_allbut_all.rb +14 -0
  195. data/spec/shared/an_operator_class.rb +10 -5
  196. data/spec/spec_helper.rb +1 -7
  197. data/spec/unit/assumptions/test_set.rb +64 -0
  198. data/spec/unit/command/doc_manager/dynamic.md +1 -0
  199. data/spec/unit/command/doc_manager/example.md +1 -0
  200. data/spec/unit/command/doc_manager/example_1.txt +11 -0
  201. data/spec/unit/command/doc_manager/static.md +1 -0
  202. data/spec/unit/command/doc_manager/test_call.rb +49 -0
  203. data/spec/unit/csv/input.csv +3 -0
  204. data/spec/unit/csv/test_reader.rb +66 -0
  205. data/spec/unit/csv/test_renderer.rb +73 -0
  206. data/spec/unit/lispy/test_relation.rb +37 -0
  207. data/spec/unit/lispy/test_run.rb +40 -0
  208. data/spec/unit/lispy/test_tuple.rb +36 -0
  209. data/spec/unit/logs/apache_combined.log +5 -0
  210. data/spec/unit/logs/postgresql.log +29 -0
  211. data/spec/unit/logs/test_reader.rb +56 -0
  212. data/spec/unit/operator/non_relational/compact/{buffer_based.rb → test_buffer_based.rb} +0 -0
  213. data/spec/unit/operator/non_relational/test_clip.rb +1 -1
  214. data/spec/unit/operator/non_relational/test_coerce.rb +35 -0
  215. data/spec/unit/operator/non_relational/test_defaults.rb +15 -2
  216. data/spec/unit/operator/non_relational/test_generator.rb +78 -0
  217. data/spec/unit/operator/relational/join/test_hash_based.rb +4 -4
  218. data/spec/unit/operator/relational/matching/test_hash_based.rb +6 -6
  219. data/spec/unit/operator/relational/not_matching/test_hash_based.rb +4 -4
  220. data/spec/unit/operator/relational/summarize/test_hash_based.rb +10 -6
  221. data/spec/unit/operator/relational/summarize/test_sort_based.rb +18 -7
  222. data/spec/unit/operator/relational/test_group.rb +8 -8
  223. data/spec/unit/operator/relational/test_intersect.rb +3 -3
  224. data/spec/unit/operator/relational/test_minus.rb +3 -3
  225. data/spec/unit/operator/relational/test_project.rb +12 -2
  226. data/spec/unit/operator/relational/test_quota.rb +5 -6
  227. data/spec/unit/operator/relational/test_summarize.rb +9 -11
  228. data/spec/unit/operator/relational/test_union.rb +1 -1
  229. data/spec/unit/operator/relational/test_wrap.rb +1 -1
  230. data/spec/unit/operator/signature/test_collect_on.rb +45 -0
  231. data/spec/unit/operator/signature/test_initialize.rb +17 -0
  232. data/spec/unit/operator/signature/test_install.rb +56 -0
  233. data/spec/unit/operator/signature/test_option_parser.rb +36 -0
  234. data/spec/unit/operator/signature/test_parse_args.rb +60 -0
  235. data/spec/unit/operator/signature/test_parse_argv.rb +87 -0
  236. data/spec/unit/operator/signature/test_to_lispy.rb +102 -0
  237. data/spec/unit/operator/signature/test_to_shell.rb +103 -0
  238. data/spec/unit/operator/test_non_relational.rb +3 -1
  239. data/spec/unit/relation/test_relops.rb +20 -15
  240. data/spec/unit/sequel/alf.db +0 -0
  241. data/spec/unit/sequel/test_environment.rb +54 -0
  242. data/spec/unit/test_aggregator.rb +32 -22
  243. data/spec/unit/test_environment.rb +5 -0
  244. data/spec/unit/test_lispy.rb +4 -0
  245. data/spec/unit/test_relation.rb +5 -0
  246. data/spec/unit/text/test_cell.rb +6 -6
  247. data/spec/unit/text/test_row.rb +3 -3
  248. data/spec/unit/text/test_table.rb +6 -6
  249. data/spec/unit/tools/test_coalesce.rb +15 -0
  250. data/spec/unit/tools/test_coerce.rb +10 -0
  251. data/spec/unit/tools/test_to_lispy.rb +138 -0
  252. data/spec/unit/tools/test_to_ruby_literal.rb +10 -0
  253. data/spec/unit/tools/test_tuple_handle.rb +1 -59
  254. data/spec/unit/types/test_attr_list.rb +106 -0
  255. data/spec/unit/types/test_attr_name.rb +52 -0
  256. data/spec/unit/{test_heading.rb → types/test_heading.rb} +10 -0
  257. data/spec/unit/types/test_ordering.rb +127 -0
  258. data/spec/unit/types/test_renaming.rb +55 -0
  259. data/spec/unit/types/test_summarization.rb +63 -0
  260. data/spec/unit/types/test_tuple_computation.rb +60 -0
  261. data/spec/unit/types/test_tuple_expression.rb +64 -0
  262. data/spec/unit/types/test_tuple_predicate.rb +79 -0
  263. data/tasks/debug_mail.rake +1 -1
  264. data/tasks/debug_mail.txt +5 -0
  265. data/tasks/gh-pages.rake +63 -0
  266. metadata +325 -52
  267. data/spec/unit/operator/test_command_methods.rb +0 -38
  268. data/spec/unit/tools/test_ordering_key.rb +0 -94
  269. data/spec/unit/tools/test_parse_commandline_args.rb +0 -47
  270. data/spec/unit/tools/test_projection_key.rb +0 -83
@@ -0,0 +1,37 @@
1
+ module Alf
2
+ class Environment
3
+ module Base
4
+
5
+ #
6
+ # Returns a dataset whose name is provided.
7
+ #
8
+ # This method resolves named datasets to tuple enumerables. When the
9
+ # dataset exists, this method must return an Iterator, typically a
10
+ # Reader instance. Otherwise, it must throw a NoSuchDatasetError.
11
+ #
12
+ # @param [Symbol] name the name of a dataset
13
+ # @return [Iterator] an iterator, typically a Reader instance
14
+ # @raise [NoSuchDatasetError] when the dataset does not exists
15
+ #
16
+ def dataset(name)
17
+ end
18
+ undef :dataset
19
+
20
+ #
21
+ # Branches this environment and puts some additional explicit
22
+ # definitions.
23
+ #
24
+ # This method is provided for (with ...) expressions and should not
25
+ # be overriden by subclasses.
26
+ #
27
+ # @param [Hash] a set of (name, Iterator) pairs.
28
+ # @return [Environment] an environment instance with new definitions set
29
+ #
30
+ def branch(defs)
31
+ Explicit.new(defs, self)
32
+ end
33
+
34
+ end # module Base
35
+ include(Base)
36
+ end # class Environment
37
+ end # module Alf
@@ -0,0 +1,93 @@
1
+ module Alf
2
+ class Environment
3
+ module ClassMethods
4
+
5
+ #
6
+ # Returns registered environments
7
+ #
8
+ def environments
9
+ @environments ||= []
10
+ end
11
+
12
+ #
13
+ # Register an environment class under a specific name.
14
+ #
15
+ # Registered class must implement a recognizes? method that takes an array
16
+ # of arguments; it must returns true if an environment instance can be built
17
+ # using those arguments, false otherwise. Please be very specific in the
18
+ # implementation for returning true. See also autodetect and recognizes?
19
+ #
20
+ # @param [Symbol] name name of the environment kind
21
+ # @param [Class] clazz class that implemented the environment
22
+ #
23
+ def register(name, clazz)
24
+ environments << [name, clazz]
25
+ (class << self; self; end).
26
+ send(:define_method, name) do |*args|
27
+ clazz.new(*args)
28
+ end
29
+ end
30
+
31
+ #
32
+ # Auto-detect the environment to use for specific arguments.
33
+ #
34
+ # This method returns an instance of the first registered Environment class
35
+ # that returns true to an invocation of recognizes?(args). It raises an
36
+ # ArgumentError if no such class can be found.
37
+ #
38
+ # @return [Environment] an environment instance
39
+ # @raise [ArgumentError] when no registered class recognizes the arguments
40
+ #
41
+ def autodetect(*args)
42
+ if (args.size == 1) && args.first.is_a?(Environment)
43
+ return args.first
44
+ else
45
+ environments.each do |name,clazz|
46
+ return clazz.new(*args) if clazz.recognizes?(args)
47
+ end
48
+ end
49
+ raise ArgumentError, "Unable to auto-detect Environment with #{args.inspect}"
50
+ end
51
+
52
+ #
53
+ # (see Environment.autodetect)
54
+ #
55
+ def coerce(*args)
56
+ autodetect(*args)
57
+ end
58
+
59
+ #
60
+ # Returns true _args_ can be used for building an environment instance,
61
+ # false otherwise.
62
+ #
63
+ # When returning true, an immediate invocation of new(*args) should
64
+ # succeed. While runtime exception are admitted (no such database, for
65
+ # example), argument errors should not occur (missing argument, wrong
66
+ # typing, etc.).
67
+ #
68
+ # Please be specific in the implementation of this extension point, as
69
+ # registered environments for a chain and each of them should have a
70
+ # chance of being selected.
71
+ #
72
+ def recognizes?(args)
73
+ false
74
+ end
75
+
76
+ #
77
+ # Returns the default environment
78
+ #
79
+ def default
80
+ examples
81
+ end
82
+
83
+ #
84
+ # Returns the examples environment
85
+ #
86
+ def examples
87
+ folder File.expand_path('../../../../examples/operators', __FILE__)
88
+ end
89
+
90
+ end # module ClassMethods
91
+ extend(ClassMethods)
92
+ end # class Environment
93
+ end # module Alf
@@ -0,0 +1,38 @@
1
+ module Alf
2
+ class Environment
3
+ #
4
+ # Specialization of Environment that works with explicitely defined
5
+ # datasources and allow branching and unbranching.
6
+ #
7
+ class Explicit < Environment
8
+
9
+ #
10
+ # Creates a new environment instance with initial definitions
11
+ # and optional child environment.
12
+ #
13
+ def initialize(defs = {}, child = nil)
14
+ @defs = defs
15
+ @child = child
16
+ end
17
+
18
+ #
19
+ # Unbranches this environment and returns its child
20
+ #
21
+ def unbranch
22
+ @child
23
+ end
24
+
25
+ # (see Environment#dataset)
26
+ def dataset(name)
27
+ if @defs.has_key?(name)
28
+ @defs[name]
29
+ elsif @child
30
+ @child.dataset(name)
31
+ else
32
+ raise "No such dataset #{name}"
33
+ end
34
+ end
35
+
36
+ end # class Explicit
37
+ end # class Environment
38
+ end # module Alf
@@ -0,0 +1,62 @@
1
+ module Alf
2
+ class Environment
3
+ #
4
+ # Specialization of Environment to work on files of a given folder.
5
+ #
6
+ # This kind of environment resolves datasets by simply looking at
7
+ # recognized files in a specific folder. "Recognized" files are simply
8
+ # those for which a Reader subclass has been previously registered.
9
+ # This environment then serves reader instances.
10
+ #
11
+ class Folder < Environment
12
+
13
+ #
14
+ # (see Environment.recognizes?)
15
+ #
16
+ # Returns true if args contains onely a String which is an existing
17
+ # folder.
18
+ #
19
+ def self.recognizes?(args)
20
+ (args.size == 1) &&
21
+ args.first.is_a?(String) &&
22
+ File.directory?(args.first.to_s)
23
+ end
24
+
25
+ #
26
+ # Creates an environment instance, wired to the specified folder.
27
+ #
28
+ # @param [String] folder path to the folder to use as dataset source
29
+ #
30
+ def initialize(folder)
31
+ @folder = folder
32
+ end
33
+
34
+ # (see Environment#dataset)
35
+ def dataset(name)
36
+ if file = find_file(name)
37
+ Reader.reader(file, self)
38
+ else
39
+ raise "No such dataset #{name} (#{@folder})"
40
+ end
41
+ end
42
+
43
+ protected
44
+
45
+ def find_file(name)
46
+ # TODO: refactor this, because it allows getting out of the folder
47
+ if File.exists?(name.to_s)
48
+ name.to_s
49
+ elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
50
+ File.file?(explicit)
51
+ explicit
52
+ else
53
+ Dir[File.join(@folder, "#{name}.*")].find do |f|
54
+ File.file?(f)
55
+ end
56
+ end
57
+ end
58
+
59
+ Environment.register(:folder, self)
60
+ end # class Folder
61
+ end # class Environment
62
+ end # module Alf
@@ -0,0 +1,104 @@
1
+ module Alf
2
+ module CSV
3
+
4
+ #
5
+ # Provides common tooling to CSV renderer and readers
6
+ #
7
+ module Commons
8
+
9
+ DEFAULT_OPTIONS = {
10
+ :headers => true
11
+ }
12
+
13
+ private
14
+
15
+ #
16
+ # Returns CSV in ruby 1.9 and FasterCSV for ruby < 1.9.
17
+ # This method handles require as well.
18
+ #
19
+ def get_csv_class
20
+ if RUBY_VERSION >= "1.9"
21
+ require 'csv'
22
+ ::CSV
23
+ else
24
+ ::Alf::Tools::friendly_require('fastercsv')
25
+ ::FasterCSV
26
+ end
27
+ end
28
+
29
+ #
30
+ # Returns a CSV instance bound to a given io and options
31
+ #
32
+ def get_csv(io)
33
+ get_csv_class.new(io, options)
34
+ end
35
+
36
+ end # module Commons
37
+
38
+ #
39
+ # Implements Alf::Renderer contract for outputting CSV files.
40
+ #
41
+ class Renderer < Alf::Renderer
42
+ include CSV::Commons
43
+
44
+ protected
45
+
46
+ # (see Renderer#render)
47
+ def render(input, output)
48
+ csv = get_csv(output)
49
+ header = nil
50
+ input.each do |tuple|
51
+ unless header
52
+ header = extract_header(tuple)
53
+ csv << header.collect{|k| k.to_s}
54
+ end
55
+ csv << extract_row(tuple, header)
56
+ end
57
+ output
58
+ end
59
+
60
+ private
61
+
62
+ def extract_header(tuple)
63
+ tuple.keys
64
+ end
65
+
66
+ def extract_row(tuple, header)
67
+ header.collect{|k| tuple[k]}
68
+ end
69
+
70
+ ::Alf::Renderer.register(:csv, "as a csv file", self)
71
+ end # class Renderer
72
+
73
+ #
74
+ # Implements Alf::Reader contract for reading CSV files.
75
+ #
76
+ class Reader < Alf::Reader
77
+ include CSV::Commons
78
+
79
+ def each
80
+ with_input_io do |io|
81
+ block = Proc.new{|row|
82
+ next if row.header_row?
83
+ yield(symbolize_keys(row.to_hash))
84
+ }
85
+ case io
86
+ when StringIO
87
+ get_csv_class.parse(io.string, options, &block)
88
+ else
89
+ get_csv(io).each(&block)
90
+ end
91
+ end
92
+ end
93
+
94
+ private
95
+
96
+ def symbolize_keys(h)
97
+ Hash[h.collect{|k,v| [k.to_sym,v] }]
98
+ end
99
+
100
+ ::Alf::Reader.register(:csv, [".csv"], self)
101
+ end # class Reader
102
+
103
+ end # module CSV
104
+ end # module Alf
@@ -0,0 +1,100 @@
1
+ module Alf
2
+ module Logs
3
+
4
+ #
5
+ # Implements Alf::Reader contract for reading log files.
6
+ #
7
+ class Reader < Alf::Reader
8
+
9
+ DEFAULT_OPTIONS = {
10
+ :file_format => nil,
11
+ :line_def => :access
12
+ }
13
+
14
+ attr_reader :options
15
+
16
+ def initialize(*args)
17
+ Alf::Tools::friendly_require('request_log_analyzer')
18
+ super(*args)
19
+ @options[:file_format] = coerce_file_format(@options[:file_format])
20
+ end
21
+
22
+ def each
23
+ parser = infer_parser(input_path)
24
+ with_input_io do |io|
25
+ parser.parse_stream(io) do |req|
26
+ yield request_to_tuple(req)
27
+ end
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def coerce_file_format(file_format)
34
+ case file_format
35
+ when NilClass
36
+ nil
37
+ when RequestLogAnalyzer::FileFormat
38
+ file_format
39
+ when Symbol
40
+ RequestLogAnalyzer::FileFormat.load(file_format)
41
+ when Array
42
+ RequestLogAnalyzer::FileFormat.load(*file_format)
43
+ else
44
+ raise ArgumentError, "Invalid file format: #{file_format}"
45
+ end
46
+ end
47
+
48
+ def infer_parser(path)
49
+ file_format = @options[:file_format] || begin
50
+ unless path
51
+ raise NotImplementedError, "Logs::Reader does not work on IO for now"
52
+ end
53
+ RequestLogAnalyzer::FileFormat.autodetect(path)
54
+ end
55
+ RequestLogAnalyzer::Source::LogParser.new(file_format)
56
+ end
57
+
58
+ def request_to_tuple(req)
59
+ req.attributes
60
+ end
61
+
62
+ LOG_TYPES = {
63
+ # RequestLogAnalyzer::Request::Converters
64
+ :string => String,
65
+ :float => Float,
66
+ :decimal => Float,
67
+ :int => Integer,
68
+ :integer => Integer,
69
+ :sym => Symbol,
70
+ :symbol => Symbol,
71
+ :timestamp => Integer,
72
+ :traffic => Integer,
73
+ :duration => Float,
74
+ :epoch => Integer,
75
+ # AmazonS3
76
+ :nillable_string => String,
77
+ :referer => String,
78
+ :user_agent => String,
79
+ # Apache
80
+ :path => String,
81
+ # MySQL
82
+ :sql => String
83
+ }
84
+
85
+ def infer_heading(format, line_def = :access)
86
+ h = Hash[format.line_definitions[line_def].captures.collect{|capt|
87
+ [ capt[:name], to_type(capt[:type]) ]
88
+ }]
89
+ Alf::Heading.new(h)
90
+ end
91
+
92
+ def to_type(log_type)
93
+ LOG_TYPES[log_type] || String
94
+ end
95
+
96
+ ::Alf::Reader.register(:logs, [".log"], self)
97
+ end # class Reader
98
+
99
+ end # module Logs
100
+ end # module Alf
@@ -0,0 +1,77 @@
1
+ module Alf
2
+ module Sequel
3
+
4
+ #
5
+ # Specialization of Alf::Environment to distribute Sequel datasets
6
+ #
7
+ class Environment < ::Alf::Environment
8
+
9
+ #
10
+ # (see Alf::Environment.recognizes?)
11
+ #
12
+ # Returns true if args contains one String that can be interpreted as
13
+ # a valid database uri.
14
+ #
15
+ def self.recognizes?(args)
16
+ require 'uri'
17
+ return false unless (args.size == 1) && args.first.is_a?(String)
18
+ uri = URI::parse(args.first)
19
+ if uri.scheme || looks_a_sqlite_file?(args.first)
20
+ true
21
+ else
22
+ false
23
+ end
24
+ rescue ::URI::Error
25
+ false
26
+ end
27
+
28
+ def self.looks_a_sqlite_file?(f)
29
+ (File.file?(f) && File.extname(f).==(".db"))
30
+ end
31
+
32
+ # Creates an Environment instance
33
+ def initialize(uri, options = {})
34
+ @uri = self.class.looks_a_sqlite_file?(uri) ? "sqlite://#{uri}" : uri
35
+ @options = options
36
+ end
37
+
38
+ #
39
+ # (see Alf::Environment#dataset)
40
+ #
41
+ def dataset(name)
42
+ Iterator.new(connect[name])
43
+ end
44
+
45
+ private
46
+
47
+ # Creates a database connection
48
+ def connect
49
+ Alf::Tools::friendly_require('sequel')
50
+ @db ||= ::Sequel.connect(@uri, @options)
51
+ end
52
+
53
+ ::Alf::Environment.register(:sequel, self)
54
+ end # class Environment
55
+
56
+ # Specialization of Alg::Iterator to work on a Sequel dataset
57
+ class Iterator
58
+ include ::Alf::Iterator
59
+
60
+ def initialize(dataset)
61
+ @dataset = dataset
62
+ end
63
+
64
+ # (see Alf::Iterator#each)
65
+ def each
66
+ @dataset.each(&Proc.new)
67
+ end
68
+
69
+ # (see Alf::Iterator#pipe)
70
+ def pipe(input, env = nil)
71
+ self
72
+ end
73
+
74
+ end # class Iterator
75
+
76
+ end # module Sequel
77
+ end # module Alf
File without changes
data/lib/alf/extra.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'alf/extra/yaml'
2
+ require 'alf/extra/csv'
3
+ require 'alf/extra/logs'
4
+ require 'alf/extra/sequel'
5
+
@@ -0,0 +1,38 @@
1
+ module Alf
2
+ module Iterator
3
+ module Base
4
+
5
+ #
6
+ # Wire the iterator input and an optional execution environment.
7
+ #
8
+ # Iterators (typically Reader and Operator instances) work from input data
9
+ # that come from files, or other operators, and so on. This method wires
10
+ # this input data to the iterator. Wiring is required before any attempt
11
+ # to call each, unless autowiring occurs at construction. The exact kind of
12
+ # input object is left at discretion of Iterator implementations.
13
+ #
14
+ # @param [Object] input the iterator input, at discretion of the Iterator
15
+ # implementation.
16
+ # @param [Environment] environment an optional environment for resolving
17
+ # named datasets if needed.
18
+ # @return [Object] self
19
+ #
20
+ def pipe(input, environment = nil)
21
+ self
22
+ end
23
+ undef :pipe
24
+
25
+ #
26
+ # Converts this iterator to an in-memory Relation.
27
+ #
28
+ # @return [Relation] a relation instance, as the set of tuples
29
+ # that would be yield by this iterator.
30
+ #
31
+ def to_rel
32
+ Relation::coerce(self)
33
+ end
34
+
35
+ end # module Base
36
+ include(Base)
37
+ end # module Iterator
38
+ end # module Alf
@@ -0,0 +1,22 @@
1
+ module Alf
2
+ module Iterator
3
+ module ClassMethods
4
+
5
+ #
6
+ # Coerces something to an iterator
7
+ #
8
+ def coerce(arg, environment = nil)
9
+ case arg
10
+ when Iterator, Array
11
+ arg
12
+ when String, Symbol
13
+ Proxy.new(environment, arg.to_sym)
14
+ else
15
+ Reader.coerce(arg, environment)
16
+ end
17
+ end
18
+
19
+ end # module ClassMethods
20
+ extend(ClassMethods)
21
+ end # module Iterator
22
+ end # module Alf
@@ -0,0 +1,33 @@
1
+ module Alf
2
+ module Iterator
3
+ class Proxy
4
+ include Iterator
5
+
6
+ # @return [Symbol] name of the dataset to request to environment
7
+ attr_reader :dataset
8
+
9
+ #
10
+ # Creates a proxy instance.
11
+ #
12
+ # @param [Environment] env the environment serving iterator instances
13
+ # @param [Symbol] dataset named dataset to rely on
14
+ #
15
+ def initialize(env, dataset)
16
+ @environment, @dataset = env, dataset
17
+ end
18
+
19
+ # (see Iterator#pipe)
20
+ def pipe(input, environment = nil)
21
+ @environment ||= environment
22
+ @dataset ||= input
23
+ self
24
+ end
25
+
26
+ # (see Iterator#each)
27
+ def each(&block)
28
+ @environment.dataset(@dataset).each(&block)
29
+ end
30
+
31
+ end # class Proxy
32
+ end # module Iterator
33
+ end # module Alf