alf 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +255 -129
- data/Gemfile +31 -1
- data/Gemfile.lock +17 -20
- data/LICENCE.md +1 -1
- data/Manifest.txt +2 -0
- data/README.md +37 -43
- data/TODO.md +1 -1
- data/alf.gemspec +10 -7
- data/alf.noespec +24 -13
- data/bin/alf +2 -2
- data/doc/commands/exec.md +16 -0
- data/doc/commands/help.md +11 -0
- data/doc/commands/main.md +33 -0
- data/doc/commands/show.md +19 -0
- data/doc/operators/non_relational/autonum.md +23 -0
- data/doc/operators/non_relational/clip.md +31 -0
- data/doc/operators/non_relational/coerce.md +15 -0
- data/doc/operators/non_relational/compact.md +20 -0
- data/doc/operators/non_relational/defaults.md +32 -0
- data/doc/operators/non_relational/generator.md +20 -0
- data/doc/operators/non_relational/sort.md +24 -0
- data/doc/operators/relational/extend.md +18 -0
- data/doc/operators/relational/group.md +27 -0
- data/doc/operators/relational/intersect.md +13 -0
- data/doc/operators/relational/join.md +27 -0
- data/doc/operators/relational/matching.md +20 -0
- data/doc/operators/relational/minus.md +12 -0
- data/doc/operators/relational/not-matching.md +20 -0
- data/doc/operators/relational/project.md +28 -0
- data/doc/operators/relational/quota.md +21 -0
- data/doc/operators/relational/rank.md +27 -0
- data/doc/operators/relational/rename.md +17 -0
- data/doc/operators/relational/restrict.md +25 -0
- data/doc/operators/relational/summarize.md +25 -0
- data/doc/operators/relational/ungroup.md +20 -0
- data/doc/operators/relational/union.md +14 -0
- data/doc/operators/relational/unwrap.md +20 -0
- data/doc/operators/relational/wrap.md +24 -0
- data/examples/csv/suppliers.csv +6 -0
- data/examples/logs/access.log +1000 -0
- data/examples/logs/combined.alf +2 -0
- data/examples/logs/hits.alf +14 -0
- data/examples/logs/not_found.alf +7 -0
- data/examples/logs/robots-cheating.alf +11 -0
- data/examples/logs/robots.alf +8 -0
- data/examples/northwind/customers.csv +92 -0
- data/examples/northwind/northwind.db +0 -0
- data/examples/northwind/orders.csv +831 -0
- data/examples/operators/clip.alf +1 -1
- data/examples/operators/database.alf +5 -6
- data/examples/operators/defaults.alf +1 -1
- data/examples/operators/group.alf +1 -1
- data/examples/operators/project.alf +2 -1
- data/examples/operators/pseudo-with.alf +2 -2
- data/examples/operators/quota.alf +2 -2
- data/examples/operators/summarize.alf +2 -2
- data/lib/alf/aggregator/aggregators.rb +77 -0
- data/lib/alf/aggregator/base.rb +95 -0
- data/lib/alf/aggregator/class_methods.rb +57 -0
- data/lib/alf/buffer/sorted.rb +48 -0
- data/lib/alf/command/class_methods.rb +27 -0
- data/lib/alf/command/doc_manager.rb +72 -0
- data/lib/alf/command/exec.rb +12 -0
- data/lib/alf/command/help.rb +31 -0
- data/lib/alf/command/main.rb +146 -0
- data/lib/alf/command/show.rb +33 -0
- data/lib/alf/environment/base.rb +37 -0
- data/lib/alf/environment/class_methods.rb +93 -0
- data/lib/alf/environment/explicit.rb +38 -0
- data/lib/alf/environment/folder.rb +62 -0
- data/lib/alf/extra/csv.rb +104 -0
- data/lib/alf/extra/logs.rb +100 -0
- data/lib/alf/extra/sequel.rb +77 -0
- data/lib/alf/{yaml.rb → extra/yaml.rb} +0 -0
- data/lib/alf/extra.rb +5 -0
- data/lib/alf/iterator/base.rb +38 -0
- data/lib/alf/iterator/class_methods.rb +22 -0
- data/lib/alf/iterator/proxy.rb +33 -0
- data/lib/alf/lispy/instance_methods.rb +157 -0
- data/lib/alf/operator/base.rb +74 -0
- data/lib/alf/operator/binary.rb +32 -0
- data/lib/alf/operator/cesure.rb +45 -0
- data/lib/alf/operator/class_methods.rb +132 -0
- data/lib/alf/operator/experimental.rb +9 -0
- data/lib/alf/operator/non_relational/autonum.rb +24 -0
- data/lib/alf/operator/non_relational/clip.rb +20 -0
- data/lib/alf/operator/non_relational/coerce.rb +21 -0
- data/lib/alf/operator/non_relational/compact.rb +62 -0
- data/lib/alf/operator/non_relational/defaults.rb +25 -0
- data/lib/alf/operator/non_relational/generator.rb +38 -0
- data/lib/alf/operator/non_relational/sort.rb +23 -0
- data/lib/alf/operator/nullary.rb +20 -0
- data/lib/alf/operator/relational/extend.rb +24 -0
- data/lib/alf/operator/relational/group.rb +32 -0
- data/lib/alf/operator/relational/intersect.rb +37 -0
- data/lib/alf/operator/relational/join.rb +106 -0
- data/lib/alf/operator/relational/matching.rb +45 -0
- data/lib/alf/operator/relational/minus.rb +37 -0
- data/lib/alf/operator/relational/not_matching.rb +45 -0
- data/lib/alf/operator/relational/project.rb +22 -0
- data/lib/alf/operator/relational/quota.rb +51 -0
- data/lib/alf/operator/relational/rank.rb +55 -0
- data/lib/alf/operator/relational/rename.rb +19 -0
- data/lib/alf/operator/relational/restrict.rb +20 -0
- data/lib/alf/operator/relational/summarize.rb +83 -0
- data/lib/alf/operator/relational/ungroup.rb +25 -0
- data/lib/alf/operator/relational/union.rb +32 -0
- data/lib/alf/operator/relational/unwrap.rb +21 -0
- data/lib/alf/operator/relational/wrap.rb +22 -0
- data/lib/alf/operator/shortcut.rb +53 -0
- data/lib/alf/operator/signature.rb +262 -0
- data/lib/alf/operator/transform.rb +27 -0
- data/lib/alf/operator/unary.rb +38 -0
- data/lib/alf/reader/alf_file.rb +24 -0
- data/lib/alf/reader/base.rb +119 -0
- data/lib/alf/reader/class_methods.rb +82 -0
- data/lib/alf/reader/rash.rb +28 -0
- data/lib/alf/relation/class_methods.rb +37 -0
- data/lib/alf/relation/instance_methods.rb +127 -0
- data/lib/alf/renderer/base.rb +72 -0
- data/lib/alf/renderer/class_methods.rb +58 -0
- data/lib/alf/renderer/rash.rb +19 -0
- data/lib/alf/{text.rb → renderer/text.rb} +1 -1
- data/lib/alf/tools/coerce.rb +14 -0
- data/lib/alf/tools/miscellaneous.rb +77 -0
- data/lib/alf/tools/to_lispy.rb +99 -0
- data/lib/alf/tools/to_ruby_literal.rb +14 -0
- data/lib/alf/tools/tuple_handle.rb +50 -0
- data/lib/alf/types/attr_list.rb +56 -0
- data/lib/alf/types/attr_name.rb +28 -0
- data/lib/alf/types/boolean.rb +12 -0
- data/lib/alf/types/heading.rb +96 -0
- data/lib/alf/types/ordering.rb +93 -0
- data/lib/alf/types/renaming.rb +57 -0
- data/lib/alf/types/summarization.rb +76 -0
- data/lib/alf/types/tuple_computation.rb +61 -0
- data/lib/alf/types/tuple_expression.rb +61 -0
- data/lib/alf/types/tuple_predicate.rb +49 -0
- data/lib/alf/version.rb +2 -2
- data/lib/alf.rb +193 -3714
- data/spec/integration/__database__/group.alf +1 -1
- data/spec/integration/__database__/suppliers_csv.csv +6 -0
- data/spec/integration/command/alf/alf.db +0 -0
- data/spec/integration/command/alf/alf_env_sqlite.cmd +1 -0
- data/spec/integration/command/alf/alf_env_sqlite.stdout +9 -0
- data/spec/integration/command/alf/alf_help.cmd +1 -0
- data/spec/integration/command/alf/alf_help.stdout +67 -0
- data/spec/integration/command/autonum/autonum_0.cmd +1 -1
- data/spec/integration/command/coerce/coerce_1.cmd +1 -0
- data/spec/integration/command/coerce/coerce_1.stdout +5 -0
- data/spec/integration/command/defaults/defaults_0.cmd +1 -1
- data/spec/integration/command/defaults/defaults_0.stdout +9 -9
- data/spec/integration/command/defaults/defaults_2.cmd +1 -0
- data/spec/integration/command/defaults/defaults_2.stdout +9 -0
- data/spec/integration/command/generator/generator_1.cmd +1 -0
- data/spec/integration/command/generator/generator_1.stdout +10 -0
- data/spec/integration/command/generator/generator_2.cmd +1 -0
- data/spec/integration/command/generator/generator_2.stdout +5 -0
- data/spec/integration/command/generator/generator_3.cmd +1 -0
- data/spec/integration/command/generator/generator_3.stdout +5 -0
- data/spec/integration/command/group/group_0.cmd +1 -1
- data/spec/integration/command/group/group_1.cmd +1 -1
- data/spec/integration/command/help/help_1.cmd +1 -0
- data/spec/integration/command/help/help_1.stdout +22 -0
- data/spec/integration/command/quota/quota_0.cmd +1 -1
- data/spec/integration/command/rank/rank_1.cmd +1 -1
- data/spec/integration/command/rank/rank_1.stdout +10 -10
- data/spec/integration/command/rank/rank_2.cmd +1 -1
- data/spec/integration/command/rank/rank_2.stdout +10 -10
- data/spec/integration/command/rank/rank_3.cmd +1 -1
- data/spec/integration/command/rank/rank_3.stdout +10 -10
- data/spec/integration/command/rank/rank_4.cmd +1 -1
- data/spec/integration/command/rank/rank_5.cmd +1 -1
- data/spec/integration/command/show/show_csv.cmd +1 -0
- data/spec/integration/command/show/show_csv.stdout +6 -0
- data/spec/integration/command/show/show_rash_2.cmd +1 -1
- data/spec/integration/command/show/show_rash_2.stdout +5 -5
- data/spec/integration/command/sort/sort_0.cmd +1 -1
- data/spec/integration/command/sort/sort_1.cmd +1 -1
- data/spec/integration/command/sort/sort_1.stdout +2 -2
- data/spec/integration/command/sort/sort_2.cmd +1 -0
- data/spec/integration/command/sort/sort_2.stdout +9 -0
- data/spec/integration/command/sort/sort_3.cmd +1 -0
- data/spec/integration/command/sort/sort_3.stdout +9 -0
- data/spec/integration/command/summarize/summarize_0.cmd +1 -1
- data/spec/integration/command/ungroup/ungroup_0.cmd +1 -1
- data/spec/integration/command/wrap/wrap_0.cmd +1 -1
- data/spec/integration/semantics/test_project.alf +5 -6
- data/spec/integration/semantics/test_rank.alf +16 -16
- data/spec/integration/test_command.rb +17 -6
- data/spec/integration/test_examples.rb +1 -1
- data/spec/regression/logs/apache_combined.log +5 -0
- data/spec/regression/logs/test_path_attribute.rb +25 -0
- data/spec/regression/relation/test_relation_allbut_all.rb +14 -0
- data/spec/shared/an_operator_class.rb +10 -5
- data/spec/spec_helper.rb +1 -7
- data/spec/unit/assumptions/test_set.rb +64 -0
- data/spec/unit/command/doc_manager/dynamic.md +1 -0
- data/spec/unit/command/doc_manager/example.md +1 -0
- data/spec/unit/command/doc_manager/example_1.txt +11 -0
- data/spec/unit/command/doc_manager/static.md +1 -0
- data/spec/unit/command/doc_manager/test_call.rb +49 -0
- data/spec/unit/csv/input.csv +3 -0
- data/spec/unit/csv/test_reader.rb +66 -0
- data/spec/unit/csv/test_renderer.rb +73 -0
- data/spec/unit/lispy/test_relation.rb +37 -0
- data/spec/unit/lispy/test_run.rb +40 -0
- data/spec/unit/lispy/test_tuple.rb +36 -0
- data/spec/unit/logs/apache_combined.log +5 -0
- data/spec/unit/logs/postgresql.log +29 -0
- data/spec/unit/logs/test_reader.rb +56 -0
- data/spec/unit/operator/non_relational/compact/{buffer_based.rb → test_buffer_based.rb} +0 -0
- data/spec/unit/operator/non_relational/test_clip.rb +1 -1
- data/spec/unit/operator/non_relational/test_coerce.rb +35 -0
- data/spec/unit/operator/non_relational/test_defaults.rb +15 -2
- data/spec/unit/operator/non_relational/test_generator.rb +78 -0
- data/spec/unit/operator/relational/join/test_hash_based.rb +4 -4
- data/spec/unit/operator/relational/matching/test_hash_based.rb +6 -6
- data/spec/unit/operator/relational/not_matching/test_hash_based.rb +4 -4
- data/spec/unit/operator/relational/summarize/test_hash_based.rb +10 -6
- data/spec/unit/operator/relational/summarize/test_sort_based.rb +18 -7
- data/spec/unit/operator/relational/test_group.rb +8 -8
- data/spec/unit/operator/relational/test_intersect.rb +3 -3
- data/spec/unit/operator/relational/test_minus.rb +3 -3
- data/spec/unit/operator/relational/test_project.rb +12 -2
- data/spec/unit/operator/relational/test_quota.rb +5 -6
- data/spec/unit/operator/relational/test_summarize.rb +9 -11
- data/spec/unit/operator/relational/test_union.rb +1 -1
- data/spec/unit/operator/relational/test_wrap.rb +1 -1
- data/spec/unit/operator/signature/test_collect_on.rb +45 -0
- data/spec/unit/operator/signature/test_initialize.rb +17 -0
- data/spec/unit/operator/signature/test_install.rb +56 -0
- data/spec/unit/operator/signature/test_option_parser.rb +36 -0
- data/spec/unit/operator/signature/test_parse_args.rb +60 -0
- data/spec/unit/operator/signature/test_parse_argv.rb +87 -0
- data/spec/unit/operator/signature/test_to_lispy.rb +102 -0
- data/spec/unit/operator/signature/test_to_shell.rb +103 -0
- data/spec/unit/operator/test_non_relational.rb +3 -1
- data/spec/unit/relation/test_relops.rb +20 -15
- data/spec/unit/sequel/alf.db +0 -0
- data/spec/unit/sequel/test_environment.rb +54 -0
- data/spec/unit/test_aggregator.rb +32 -22
- data/spec/unit/test_environment.rb +5 -0
- data/spec/unit/test_lispy.rb +4 -0
- data/spec/unit/test_relation.rb +5 -0
- data/spec/unit/text/test_cell.rb +6 -6
- data/spec/unit/text/test_row.rb +3 -3
- data/spec/unit/text/test_table.rb +6 -6
- data/spec/unit/tools/test_coalesce.rb +15 -0
- data/spec/unit/tools/test_coerce.rb +10 -0
- data/spec/unit/tools/test_to_lispy.rb +138 -0
- data/spec/unit/tools/test_to_ruby_literal.rb +10 -0
- data/spec/unit/tools/test_tuple_handle.rb +1 -59
- data/spec/unit/types/test_attr_list.rb +106 -0
- data/spec/unit/types/test_attr_name.rb +52 -0
- data/spec/unit/{test_heading.rb → types/test_heading.rb} +10 -0
- data/spec/unit/types/test_ordering.rb +127 -0
- data/spec/unit/types/test_renaming.rb +55 -0
- data/spec/unit/types/test_summarization.rb +63 -0
- data/spec/unit/types/test_tuple_computation.rb +60 -0
- data/spec/unit/types/test_tuple_expression.rb +64 -0
- data/spec/unit/types/test_tuple_predicate.rb +79 -0
- data/tasks/debug_mail.rake +1 -1
- data/tasks/debug_mail.txt +5 -0
- data/tasks/gh-pages.rake +63 -0
- metadata +325 -52
- data/spec/unit/operator/test_command_methods.rb +0 -38
- data/spec/unit/tools/test_ordering_key.rb +0 -94
- data/spec/unit/tools/test_parse_commandline_args.rb +0 -47
- data/spec/unit/tools/test_projection_key.rb +0 -83
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
class Environment
|
|
3
|
+
module Base
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# Returns a dataset whose name is provided.
|
|
7
|
+
#
|
|
8
|
+
# This method resolves named datasets to tuple enumerables. When the
|
|
9
|
+
# dataset exists, this method must return an Iterator, typically a
|
|
10
|
+
# Reader instance. Otherwise, it must throw a NoSuchDatasetError.
|
|
11
|
+
#
|
|
12
|
+
# @param [Symbol] name the name of a dataset
|
|
13
|
+
# @return [Iterator] an iterator, typically a Reader instance
|
|
14
|
+
# @raise [NoSuchDatasetError] when the dataset does not exists
|
|
15
|
+
#
|
|
16
|
+
def dataset(name)
|
|
17
|
+
end
|
|
18
|
+
undef :dataset
|
|
19
|
+
|
|
20
|
+
#
|
|
21
|
+
# Branches this environment and puts some additional explicit
|
|
22
|
+
# definitions.
|
|
23
|
+
#
|
|
24
|
+
# This method is provided for (with ...) expressions and should not
|
|
25
|
+
# be overriden by subclasses.
|
|
26
|
+
#
|
|
27
|
+
# @param [Hash] a set of (name, Iterator) pairs.
|
|
28
|
+
# @return [Environment] an environment instance with new definitions set
|
|
29
|
+
#
|
|
30
|
+
def branch(defs)
|
|
31
|
+
Explicit.new(defs, self)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end # module Base
|
|
35
|
+
include(Base)
|
|
36
|
+
end # class Environment
|
|
37
|
+
end # module Alf
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
class Environment
|
|
3
|
+
module ClassMethods
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# Returns registered environments
|
|
7
|
+
#
|
|
8
|
+
def environments
|
|
9
|
+
@environments ||= []
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
#
|
|
13
|
+
# Register an environment class under a specific name.
|
|
14
|
+
#
|
|
15
|
+
# Registered class must implement a recognizes? method that takes an array
|
|
16
|
+
# of arguments; it must returns true if an environment instance can be built
|
|
17
|
+
# using those arguments, false otherwise. Please be very specific in the
|
|
18
|
+
# implementation for returning true. See also autodetect and recognizes?
|
|
19
|
+
#
|
|
20
|
+
# @param [Symbol] name name of the environment kind
|
|
21
|
+
# @param [Class] clazz class that implemented the environment
|
|
22
|
+
#
|
|
23
|
+
def register(name, clazz)
|
|
24
|
+
environments << [name, clazz]
|
|
25
|
+
(class << self; self; end).
|
|
26
|
+
send(:define_method, name) do |*args|
|
|
27
|
+
clazz.new(*args)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#
|
|
32
|
+
# Auto-detect the environment to use for specific arguments.
|
|
33
|
+
#
|
|
34
|
+
# This method returns an instance of the first registered Environment class
|
|
35
|
+
# that returns true to an invocation of recognizes?(args). It raises an
|
|
36
|
+
# ArgumentError if no such class can be found.
|
|
37
|
+
#
|
|
38
|
+
# @return [Environment] an environment instance
|
|
39
|
+
# @raise [ArgumentError] when no registered class recognizes the arguments
|
|
40
|
+
#
|
|
41
|
+
def autodetect(*args)
|
|
42
|
+
if (args.size == 1) && args.first.is_a?(Environment)
|
|
43
|
+
return args.first
|
|
44
|
+
else
|
|
45
|
+
environments.each do |name,clazz|
|
|
46
|
+
return clazz.new(*args) if clazz.recognizes?(args)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
raise ArgumentError, "Unable to auto-detect Environment with #{args.inspect}"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
#
|
|
53
|
+
# (see Environment.autodetect)
|
|
54
|
+
#
|
|
55
|
+
def coerce(*args)
|
|
56
|
+
autodetect(*args)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
#
|
|
60
|
+
# Returns true _args_ can be used for building an environment instance,
|
|
61
|
+
# false otherwise.
|
|
62
|
+
#
|
|
63
|
+
# When returning true, an immediate invocation of new(*args) should
|
|
64
|
+
# succeed. While runtime exception are admitted (no such database, for
|
|
65
|
+
# example), argument errors should not occur (missing argument, wrong
|
|
66
|
+
# typing, etc.).
|
|
67
|
+
#
|
|
68
|
+
# Please be specific in the implementation of this extension point, as
|
|
69
|
+
# registered environments for a chain and each of them should have a
|
|
70
|
+
# chance of being selected.
|
|
71
|
+
#
|
|
72
|
+
def recognizes?(args)
|
|
73
|
+
false
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
#
|
|
77
|
+
# Returns the default environment
|
|
78
|
+
#
|
|
79
|
+
def default
|
|
80
|
+
examples
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
#
|
|
84
|
+
# Returns the examples environment
|
|
85
|
+
#
|
|
86
|
+
def examples
|
|
87
|
+
folder File.expand_path('../../../../examples/operators', __FILE__)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
end # module ClassMethods
|
|
91
|
+
extend(ClassMethods)
|
|
92
|
+
end # class Environment
|
|
93
|
+
end # module Alf
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
class Environment
|
|
3
|
+
#
|
|
4
|
+
# Specialization of Environment that works with explicitely defined
|
|
5
|
+
# datasources and allow branching and unbranching.
|
|
6
|
+
#
|
|
7
|
+
class Explicit < Environment
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# Creates a new environment instance with initial definitions
|
|
11
|
+
# and optional child environment.
|
|
12
|
+
#
|
|
13
|
+
def initialize(defs = {}, child = nil)
|
|
14
|
+
@defs = defs
|
|
15
|
+
@child = child
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
#
|
|
19
|
+
# Unbranches this environment and returns its child
|
|
20
|
+
#
|
|
21
|
+
def unbranch
|
|
22
|
+
@child
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# (see Environment#dataset)
|
|
26
|
+
def dataset(name)
|
|
27
|
+
if @defs.has_key?(name)
|
|
28
|
+
@defs[name]
|
|
29
|
+
elsif @child
|
|
30
|
+
@child.dataset(name)
|
|
31
|
+
else
|
|
32
|
+
raise "No such dataset #{name}"
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end # class Explicit
|
|
37
|
+
end # class Environment
|
|
38
|
+
end # module Alf
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
class Environment
|
|
3
|
+
#
|
|
4
|
+
# Specialization of Environment to work on files of a given folder.
|
|
5
|
+
#
|
|
6
|
+
# This kind of environment resolves datasets by simply looking at
|
|
7
|
+
# recognized files in a specific folder. "Recognized" files are simply
|
|
8
|
+
# those for which a Reader subclass has been previously registered.
|
|
9
|
+
# This environment then serves reader instances.
|
|
10
|
+
#
|
|
11
|
+
class Folder < Environment
|
|
12
|
+
|
|
13
|
+
#
|
|
14
|
+
# (see Environment.recognizes?)
|
|
15
|
+
#
|
|
16
|
+
# Returns true if args contains onely a String which is an existing
|
|
17
|
+
# folder.
|
|
18
|
+
#
|
|
19
|
+
def self.recognizes?(args)
|
|
20
|
+
(args.size == 1) &&
|
|
21
|
+
args.first.is_a?(String) &&
|
|
22
|
+
File.directory?(args.first.to_s)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
#
|
|
26
|
+
# Creates an environment instance, wired to the specified folder.
|
|
27
|
+
#
|
|
28
|
+
# @param [String] folder path to the folder to use as dataset source
|
|
29
|
+
#
|
|
30
|
+
def initialize(folder)
|
|
31
|
+
@folder = folder
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# (see Environment#dataset)
|
|
35
|
+
def dataset(name)
|
|
36
|
+
if file = find_file(name)
|
|
37
|
+
Reader.reader(file, self)
|
|
38
|
+
else
|
|
39
|
+
raise "No such dataset #{name} (#{@folder})"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
protected
|
|
44
|
+
|
|
45
|
+
def find_file(name)
|
|
46
|
+
# TODO: refactor this, because it allows getting out of the folder
|
|
47
|
+
if File.exists?(name.to_s)
|
|
48
|
+
name.to_s
|
|
49
|
+
elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
|
|
50
|
+
File.file?(explicit)
|
|
51
|
+
explicit
|
|
52
|
+
else
|
|
53
|
+
Dir[File.join(@folder, "#{name}.*")].find do |f|
|
|
54
|
+
File.file?(f)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
Environment.register(:folder, self)
|
|
60
|
+
end # class Folder
|
|
61
|
+
end # class Environment
|
|
62
|
+
end # module Alf
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
module CSV
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# Provides common tooling to CSV renderer and readers
|
|
6
|
+
#
|
|
7
|
+
module Commons
|
|
8
|
+
|
|
9
|
+
DEFAULT_OPTIONS = {
|
|
10
|
+
:headers => true
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
# Returns CSV in ruby 1.9 and FasterCSV for ruby < 1.9.
|
|
17
|
+
# This method handles require as well.
|
|
18
|
+
#
|
|
19
|
+
def get_csv_class
|
|
20
|
+
if RUBY_VERSION >= "1.9"
|
|
21
|
+
require 'csv'
|
|
22
|
+
::CSV
|
|
23
|
+
else
|
|
24
|
+
::Alf::Tools::friendly_require('fastercsv')
|
|
25
|
+
::FasterCSV
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
#
|
|
30
|
+
# Returns a CSV instance bound to a given io and options
|
|
31
|
+
#
|
|
32
|
+
def get_csv(io)
|
|
33
|
+
get_csv_class.new(io, options)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end # module Commons
|
|
37
|
+
|
|
38
|
+
#
|
|
39
|
+
# Implements Alf::Renderer contract for outputting CSV files.
|
|
40
|
+
#
|
|
41
|
+
class Renderer < Alf::Renderer
|
|
42
|
+
include CSV::Commons
|
|
43
|
+
|
|
44
|
+
protected
|
|
45
|
+
|
|
46
|
+
# (see Renderer#render)
|
|
47
|
+
def render(input, output)
|
|
48
|
+
csv = get_csv(output)
|
|
49
|
+
header = nil
|
|
50
|
+
input.each do |tuple|
|
|
51
|
+
unless header
|
|
52
|
+
header = extract_header(tuple)
|
|
53
|
+
csv << header.collect{|k| k.to_s}
|
|
54
|
+
end
|
|
55
|
+
csv << extract_row(tuple, header)
|
|
56
|
+
end
|
|
57
|
+
output
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def extract_header(tuple)
|
|
63
|
+
tuple.keys
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def extract_row(tuple, header)
|
|
67
|
+
header.collect{|k| tuple[k]}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
::Alf::Renderer.register(:csv, "as a csv file", self)
|
|
71
|
+
end # class Renderer
|
|
72
|
+
|
|
73
|
+
#
|
|
74
|
+
# Implements Alf::Reader contract for reading CSV files.
|
|
75
|
+
#
|
|
76
|
+
class Reader < Alf::Reader
|
|
77
|
+
include CSV::Commons
|
|
78
|
+
|
|
79
|
+
def each
|
|
80
|
+
with_input_io do |io|
|
|
81
|
+
block = Proc.new{|row|
|
|
82
|
+
next if row.header_row?
|
|
83
|
+
yield(symbolize_keys(row.to_hash))
|
|
84
|
+
}
|
|
85
|
+
case io
|
|
86
|
+
when StringIO
|
|
87
|
+
get_csv_class.parse(io.string, options, &block)
|
|
88
|
+
else
|
|
89
|
+
get_csv(io).each(&block)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
def symbolize_keys(h)
|
|
97
|
+
Hash[h.collect{|k,v| [k.to_sym,v] }]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
::Alf::Reader.register(:csv, [".csv"], self)
|
|
101
|
+
end # class Reader
|
|
102
|
+
|
|
103
|
+
end # module CSV
|
|
104
|
+
end # module Alf
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
module Logs
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# Implements Alf::Reader contract for reading log files.
|
|
6
|
+
#
|
|
7
|
+
class Reader < Alf::Reader
|
|
8
|
+
|
|
9
|
+
DEFAULT_OPTIONS = {
|
|
10
|
+
:file_format => nil,
|
|
11
|
+
:line_def => :access
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
attr_reader :options
|
|
15
|
+
|
|
16
|
+
def initialize(*args)
|
|
17
|
+
Alf::Tools::friendly_require('request_log_analyzer')
|
|
18
|
+
super(*args)
|
|
19
|
+
@options[:file_format] = coerce_file_format(@options[:file_format])
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def each
|
|
23
|
+
parser = infer_parser(input_path)
|
|
24
|
+
with_input_io do |io|
|
|
25
|
+
parser.parse_stream(io) do |req|
|
|
26
|
+
yield request_to_tuple(req)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def coerce_file_format(file_format)
|
|
34
|
+
case file_format
|
|
35
|
+
when NilClass
|
|
36
|
+
nil
|
|
37
|
+
when RequestLogAnalyzer::FileFormat
|
|
38
|
+
file_format
|
|
39
|
+
when Symbol
|
|
40
|
+
RequestLogAnalyzer::FileFormat.load(file_format)
|
|
41
|
+
when Array
|
|
42
|
+
RequestLogAnalyzer::FileFormat.load(*file_format)
|
|
43
|
+
else
|
|
44
|
+
raise ArgumentError, "Invalid file format: #{file_format}"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def infer_parser(path)
|
|
49
|
+
file_format = @options[:file_format] || begin
|
|
50
|
+
unless path
|
|
51
|
+
raise NotImplementedError, "Logs::Reader does not work on IO for now"
|
|
52
|
+
end
|
|
53
|
+
RequestLogAnalyzer::FileFormat.autodetect(path)
|
|
54
|
+
end
|
|
55
|
+
RequestLogAnalyzer::Source::LogParser.new(file_format)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def request_to_tuple(req)
|
|
59
|
+
req.attributes
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
LOG_TYPES = {
|
|
63
|
+
# RequestLogAnalyzer::Request::Converters
|
|
64
|
+
:string => String,
|
|
65
|
+
:float => Float,
|
|
66
|
+
:decimal => Float,
|
|
67
|
+
:int => Integer,
|
|
68
|
+
:integer => Integer,
|
|
69
|
+
:sym => Symbol,
|
|
70
|
+
:symbol => Symbol,
|
|
71
|
+
:timestamp => Integer,
|
|
72
|
+
:traffic => Integer,
|
|
73
|
+
:duration => Float,
|
|
74
|
+
:epoch => Integer,
|
|
75
|
+
# AmazonS3
|
|
76
|
+
:nillable_string => String,
|
|
77
|
+
:referer => String,
|
|
78
|
+
:user_agent => String,
|
|
79
|
+
# Apache
|
|
80
|
+
:path => String,
|
|
81
|
+
# MySQL
|
|
82
|
+
:sql => String
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
def infer_heading(format, line_def = :access)
|
|
86
|
+
h = Hash[format.line_definitions[line_def].captures.collect{|capt|
|
|
87
|
+
[ capt[:name], to_type(capt[:type]) ]
|
|
88
|
+
}]
|
|
89
|
+
Alf::Heading.new(h)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def to_type(log_type)
|
|
93
|
+
LOG_TYPES[log_type] || String
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
::Alf::Reader.register(:logs, [".log"], self)
|
|
97
|
+
end # class Reader
|
|
98
|
+
|
|
99
|
+
end # module Logs
|
|
100
|
+
end # module Alf
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
module Sequel
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# Specialization of Alf::Environment to distribute Sequel datasets
|
|
6
|
+
#
|
|
7
|
+
class Environment < ::Alf::Environment
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# (see Alf::Environment.recognizes?)
|
|
11
|
+
#
|
|
12
|
+
# Returns true if args contains one String that can be interpreted as
|
|
13
|
+
# a valid database uri.
|
|
14
|
+
#
|
|
15
|
+
def self.recognizes?(args)
|
|
16
|
+
require 'uri'
|
|
17
|
+
return false unless (args.size == 1) && args.first.is_a?(String)
|
|
18
|
+
uri = URI::parse(args.first)
|
|
19
|
+
if uri.scheme || looks_a_sqlite_file?(args.first)
|
|
20
|
+
true
|
|
21
|
+
else
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
rescue ::URI::Error
|
|
25
|
+
false
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def self.looks_a_sqlite_file?(f)
|
|
29
|
+
(File.file?(f) && File.extname(f).==(".db"))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Creates an Environment instance
|
|
33
|
+
def initialize(uri, options = {})
|
|
34
|
+
@uri = self.class.looks_a_sqlite_file?(uri) ? "sqlite://#{uri}" : uri
|
|
35
|
+
@options = options
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
#
|
|
39
|
+
# (see Alf::Environment#dataset)
|
|
40
|
+
#
|
|
41
|
+
def dataset(name)
|
|
42
|
+
Iterator.new(connect[name])
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
# Creates a database connection
|
|
48
|
+
def connect
|
|
49
|
+
Alf::Tools::friendly_require('sequel')
|
|
50
|
+
@db ||= ::Sequel.connect(@uri, @options)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
::Alf::Environment.register(:sequel, self)
|
|
54
|
+
end # class Environment
|
|
55
|
+
|
|
56
|
+
# Specialization of Alg::Iterator to work on a Sequel dataset
|
|
57
|
+
class Iterator
|
|
58
|
+
include ::Alf::Iterator
|
|
59
|
+
|
|
60
|
+
def initialize(dataset)
|
|
61
|
+
@dataset = dataset
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# (see Alf::Iterator#each)
|
|
65
|
+
def each
|
|
66
|
+
@dataset.each(&Proc.new)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# (see Alf::Iterator#pipe)
|
|
70
|
+
def pipe(input, env = nil)
|
|
71
|
+
self
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end # class Iterator
|
|
75
|
+
|
|
76
|
+
end # module Sequel
|
|
77
|
+
end # module Alf
|
|
File without changes
|
data/lib/alf/extra.rb
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
module Iterator
|
|
3
|
+
module Base
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# Wire the iterator input and an optional execution environment.
|
|
7
|
+
#
|
|
8
|
+
# Iterators (typically Reader and Operator instances) work from input data
|
|
9
|
+
# that come from files, or other operators, and so on. This method wires
|
|
10
|
+
# this input data to the iterator. Wiring is required before any attempt
|
|
11
|
+
# to call each, unless autowiring occurs at construction. The exact kind of
|
|
12
|
+
# input object is left at discretion of Iterator implementations.
|
|
13
|
+
#
|
|
14
|
+
# @param [Object] input the iterator input, at discretion of the Iterator
|
|
15
|
+
# implementation.
|
|
16
|
+
# @param [Environment] environment an optional environment for resolving
|
|
17
|
+
# named datasets if needed.
|
|
18
|
+
# @return [Object] self
|
|
19
|
+
#
|
|
20
|
+
def pipe(input, environment = nil)
|
|
21
|
+
self
|
|
22
|
+
end
|
|
23
|
+
undef :pipe
|
|
24
|
+
|
|
25
|
+
#
|
|
26
|
+
# Converts this iterator to an in-memory Relation.
|
|
27
|
+
#
|
|
28
|
+
# @return [Relation] a relation instance, as the set of tuples
|
|
29
|
+
# that would be yield by this iterator.
|
|
30
|
+
#
|
|
31
|
+
def to_rel
|
|
32
|
+
Relation::coerce(self)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end # module Base
|
|
36
|
+
include(Base)
|
|
37
|
+
end # module Iterator
|
|
38
|
+
end # module Alf
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
module Iterator
|
|
3
|
+
module ClassMethods
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# Coerces something to an iterator
|
|
7
|
+
#
|
|
8
|
+
def coerce(arg, environment = nil)
|
|
9
|
+
case arg
|
|
10
|
+
when Iterator, Array
|
|
11
|
+
arg
|
|
12
|
+
when String, Symbol
|
|
13
|
+
Proxy.new(environment, arg.to_sym)
|
|
14
|
+
else
|
|
15
|
+
Reader.coerce(arg, environment)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end # module ClassMethods
|
|
20
|
+
extend(ClassMethods)
|
|
21
|
+
end # module Iterator
|
|
22
|
+
end # module Alf
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
module Alf
|
|
2
|
+
module Iterator
|
|
3
|
+
class Proxy
|
|
4
|
+
include Iterator
|
|
5
|
+
|
|
6
|
+
# @return [Symbol] name of the dataset to request to environment
|
|
7
|
+
attr_reader :dataset
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# Creates a proxy instance.
|
|
11
|
+
#
|
|
12
|
+
# @param [Environment] env the environment serving iterator instances
|
|
13
|
+
# @param [Symbol] dataset named dataset to rely on
|
|
14
|
+
#
|
|
15
|
+
def initialize(env, dataset)
|
|
16
|
+
@environment, @dataset = env, dataset
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# (see Iterator#pipe)
|
|
20
|
+
def pipe(input, environment = nil)
|
|
21
|
+
@environment ||= environment
|
|
22
|
+
@dataset ||= input
|
|
23
|
+
self
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# (see Iterator#each)
|
|
27
|
+
def each(&block)
|
|
28
|
+
@environment.dataset(@dataset).each(&block)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end # class Proxy
|
|
32
|
+
end # module Iterator
|
|
33
|
+
end # module Alf
|