opener-tokenizer-base 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +148 -0
  3. data/bin/tokenizer-base +5 -0
  4. data/bin/tokenizer-de +5 -0
  5. data/bin/tokenizer-en +5 -0
  6. data/bin/tokenizer-es +5 -0
  7. data/bin/tokenizer-fr +5 -0
  8. data/bin/tokenizer-it +5 -0
  9. data/bin/tokenizer-nl +5 -0
  10. data/core/lib/Data/OptList.pm +256 -0
  11. data/core/lib/Params/Util.pm +866 -0
  12. data/core/lib/Sub/Exporter.pm +1101 -0
  13. data/core/lib/Sub/Exporter/Cookbook.pod +309 -0
  14. data/core/lib/Sub/Exporter/Tutorial.pod +280 -0
  15. data/core/lib/Sub/Exporter/Util.pm +354 -0
  16. data/core/lib/Sub/Install.pm +329 -0
  17. data/core/lib/Time/Stamp.pm +808 -0
  18. data/core/load-prefixes.pl +43 -0
  19. data/core/nonbreaking_prefixes/abbreviation_list.kaf +0 -0
  20. data/core/nonbreaking_prefixes/abbreviation_list.txt +444 -0
  21. data/core/nonbreaking_prefixes/nonbreaking_prefix.ca +533 -0
  22. data/core/nonbreaking_prefixes/nonbreaking_prefix.de +781 -0
  23. data/core/nonbreaking_prefixes/nonbreaking_prefix.el +448 -0
  24. data/core/nonbreaking_prefixes/nonbreaking_prefix.en +564 -0
  25. data/core/nonbreaking_prefixes/nonbreaking_prefix.es +758 -0
  26. data/core/nonbreaking_prefixes/nonbreaking_prefix.fr +1027 -0
  27. data/core/nonbreaking_prefixes/nonbreaking_prefix.is +697 -0
  28. data/core/nonbreaking_prefixes/nonbreaking_prefix.it +641 -0
  29. data/core/nonbreaking_prefixes/nonbreaking_prefix.nl +739 -0
  30. data/core/nonbreaking_prefixes/nonbreaking_prefix.pl +729 -0
  31. data/core/nonbreaking_prefixes/nonbreaking_prefix.pt +656 -0
  32. data/core/nonbreaking_prefixes/nonbreaking_prefix.ro +484 -0
  33. data/core/nonbreaking_prefixes/nonbreaking_prefix.ru +705 -0
  34. data/core/nonbreaking_prefixes/nonbreaking_prefix.sk +920 -0
  35. data/core/nonbreaking_prefixes/nonbreaking_prefix.sl +524 -0
  36. data/core/nonbreaking_prefixes/nonbreaking_prefix.sv +492 -0
  37. data/core/split-sentences.pl +114 -0
  38. data/core/text-fixer.pl +169 -0
  39. data/core/tokenizer-cli.pl +363 -0
  40. data/core/tokenizer.pl +145 -0
  41. data/lib/opener/tokenizers/base.rb +84 -0
  42. data/lib/opener/tokenizers/base/version.rb +8 -0
  43. data/opener-tokenizer-base.gemspec +25 -0
  44. metadata +134 -0
@@ -0,0 +1,1101 @@
1
+ use 5.006;
2
+ use strict;
3
+ use warnings;
4
+ package Sub::Exporter;
5
+ {
6
+ $Sub::Exporter::VERSION = '0.984';
7
+ }
8
+ # ABSTRACT: a sophisticated exporter for custom-built routines
9
+
10
+ use Carp ();
11
+ use Data::OptList 0.100 ();
12
+ use Params::Util 0.14 (); # _CODELIKE
13
+ use Sub::Install 0.92 ();
14
+
15
+
16
+ # Given a potential import name, this returns the group name -- if it's got a
17
+ # group prefix.
18
+ sub _group_name {
19
+ my ($name) = @_;
20
+
21
+ return if (index q{-:}, (substr $name, 0, 1)) == -1;
22
+ return substr $name, 1;
23
+ }
24
+
25
+ # \@groups is a canonicalized opt list of exports and groups this returns
26
+ # another canonicalized opt list with groups replaced with relevant exports.
27
+ # \%seen is groups we've already expanded and can ignore.
28
+ # \%merge is merged options from the group we're descending through.
29
+ sub _expand_groups {
30
+ my ($class, $config, $groups, $collection, $seen, $merge) = @_;
31
+ $seen ||= {};
32
+ $merge ||= {};
33
+ my @groups = @$groups;
34
+
35
+ for my $i (reverse 0 .. $#groups) {
36
+ if (my $group_name = _group_name($groups[$i][0])) {
37
+ my $seen = { %$seen }; # faux-dynamic scoping
38
+
39
+ splice @groups, $i, 1,
40
+ _expand_group($class, $config, $groups[$i], $collection, $seen, $merge);
41
+ } else {
42
+ # there's nothing to munge in this export's args
43
+ next unless my %merge = %$merge;
44
+
45
+ # we have things to merge in; do so
46
+ my $prefix = (delete $merge{-prefix}) || '';
47
+ my $suffix = (delete $merge{-suffix}) || '';
48
+
49
+ if (
50
+ Params::Util::_CODELIKE($groups[$i][1]) ## no critic Private
51
+ or
52
+ Params::Util::_SCALAR0($groups[$i][1]) ## no critic Private
53
+ ) {
54
+ # this entry was build by a group generator
55
+ $groups[$i][0] = $prefix . $groups[$i][0] . $suffix;
56
+ } else {
57
+ my $as
58
+ = ref $groups[$i][1]{-as} ? $groups[$i][1]{-as}
59
+ : $groups[$i][1]{-as} ? $prefix . $groups[$i][1]{-as} . $suffix
60
+ : $prefix . $groups[$i][0] . $suffix;
61
+
62
+ $groups[$i][1] = { %{ $groups[$i][1] }, %merge, -as => $as };
63
+ }
64
+ }
65
+ }
66
+
67
+ return \@groups;
68
+ }
69
+
70
+ # \@group is a name/value pair from an opt list.
71
+ sub _expand_group {
72
+ my ($class, $config, $group, $collection, $seen, $merge) = @_;
73
+ $merge ||= {};
74
+
75
+ my ($group_name, $group_arg) = @$group;
76
+ $group_name = _group_name($group_name);
77
+
78
+ Carp::croak qq(group "$group_name" is not exported by the $class module)
79
+ unless exists $config->{groups}{$group_name};
80
+
81
+ return if $seen->{$group_name}++;
82
+
83
+ if (ref $group_arg) {
84
+ my $prefix = (delete $merge->{-prefix}||'') . ($group_arg->{-prefix}||'');
85
+ my $suffix = ($group_arg->{-suffix}||'') . (delete $merge->{-suffix}||'');
86
+ $merge = {
87
+ %$merge,
88
+ %$group_arg,
89
+ ($prefix ? (-prefix => $prefix) : ()),
90
+ ($suffix ? (-suffix => $suffix) : ()),
91
+ };
92
+ }
93
+
94
+ my $exports = $config->{groups}{$group_name};
95
+
96
+ if (
97
+ Params::Util::_CODELIKE($exports) ## no critic Private
98
+ or
99
+ Params::Util::_SCALAR0($exports) ## no critic Private
100
+ ) {
101
+ # I'm not very happy with this code for hiding -prefix and -suffix, but
102
+ # it's needed, and I'm not sure, offhand, how to make it better.
103
+ # -- rjbs, 2006-12-05
104
+ my $group_arg = $merge ? { %$merge } : {};
105
+ delete $group_arg->{-prefix};
106
+ delete $group_arg->{-suffix};
107
+
108
+ my $group = Params::Util::_CODELIKE($exports) ## no critic Private
109
+ ? $exports->($class, $group_name, $group_arg, $collection)
110
+ : $class->$$exports($group_name, $group_arg, $collection);
111
+
112
+ Carp::croak qq(group generator "$group_name" did not return a hashref)
113
+ if ref $group ne 'HASH';
114
+
115
+ my $stuff = [ map { [ $_ => $group->{$_} ] } keys %$group ];
116
+ return @{
117
+ _expand_groups($class, $config, $stuff, $collection, $seen, $merge)
118
+ };
119
+ } else {
120
+ $exports
121
+ = Data::OptList::mkopt($exports, "$group_name exports");
122
+
123
+ return @{
124
+ _expand_groups($class, $config, $exports, $collection, $seen, $merge)
125
+ };
126
+ }
127
+ }
128
+
129
+ sub _mk_collection_builder {
130
+ my ($col, $etc) = @_;
131
+ my ($config, $import_args, $class, $into) = @$etc;
132
+
133
+ my %seen;
134
+ sub {
135
+ my ($collection) = @_;
136
+ my ($name, $value) = @$collection;
137
+
138
+ Carp::croak "collection $name provided multiple times in import"
139
+ if $seen{ $name }++;
140
+
141
+ if (ref(my $hook = $config->{collectors}{$name})) {
142
+ my $arg = {
143
+ name => $name,
144
+ config => $config,
145
+ import_args => $import_args,
146
+ class => $class,
147
+ into => $into,
148
+ };
149
+
150
+ my $error_msg = "collection $name failed validation";
151
+ if (Params::Util::_SCALAR0($hook)) { ## no critic Private
152
+ Carp::croak $error_msg unless $class->$$hook($value, $arg);
153
+ } else {
154
+ Carp::croak $error_msg unless $hook->($value, $arg);
155
+ }
156
+ }
157
+
158
+ $col->{ $name } = $value;
159
+ }
160
+ }
161
+
162
+ # Given a config and pre-canonicalized importer args, remove collections from
163
+ # the args and return them.
164
+ sub _collect_collections {
165
+ my ($config, $import_args, $class, $into) = @_;
166
+
167
+ my @collections
168
+ = map { splice @$import_args, $_, 1 }
169
+ grep { exists $config->{collectors}{ $import_args->[$_][0] } }
170
+ reverse 0 .. $#$import_args;
171
+
172
+ unshift @collections, [ INIT => {} ] if $config->{collectors}{INIT};
173
+
174
+ my $col = {};
175
+ my $builder = _mk_collection_builder($col, \@_);
176
+ for my $collection (@collections) {
177
+ $builder->($collection)
178
+ }
179
+
180
+ return $col;
181
+ }
182
+
183
+
184
+ sub setup_exporter {
185
+ my ($config) = @_;
186
+
187
+ Carp::croak 'into and into_level may not both be supplied to exporter'
188
+ if exists $config->{into} and exists $config->{into_level};
189
+
190
+ my $as = delete $config->{as} || 'import';
191
+ my $into
192
+ = exists $config->{into} ? delete $config->{into}
193
+ : exists $config->{into_level} ? caller(delete $config->{into_level})
194
+ : caller(0);
195
+
196
+ my $import = build_exporter($config);
197
+
198
+ Sub::Install::reinstall_sub({
199
+ code => $import,
200
+ into => $into,
201
+ as => $as,
202
+ });
203
+ }
204
+
205
+
206
+ sub _key_intersection {
207
+ my ($x, $y) = @_;
208
+ my %seen = map { $_ => 1 } keys %$x;
209
+ my @names = grep { $seen{$_} } keys %$y;
210
+ }
211
+
212
+ # Given the config passed to setup_exporter, which contains sugary opt list
213
+ # data, rewrite the opt lists into hashes, catch a few kinds of invalid
214
+ # configurations, and set up defaults. Since the config is a reference, it's
215
+ # rewritten in place.
216
+ my %valid_config_key;
217
+ BEGIN {
218
+ %valid_config_key =
219
+ map { $_ => 1 }
220
+ qw(as collectors installer generator exports groups into into_level),
221
+ qw(exporter), # deprecated
222
+ }
223
+
224
+ sub _assert_collector_names_ok {
225
+ my ($collectors) = @_;
226
+
227
+ for my $reserved_name (grep { /\A[_A-Z]+\z/ } keys %$collectors) {
228
+ Carp::croak "unknown reserved collector name: $reserved_name"
229
+ if $reserved_name ne 'INIT';
230
+ }
231
+ }
232
+
233
+ sub _rewrite_build_config {
234
+ my ($config) = @_;
235
+
236
+ if (my @keys = grep { not exists $valid_config_key{$_} } keys %$config) {
237
+ Carp::croak "unknown options (@keys) passed to Sub::Exporter";
238
+ }
239
+
240
+ Carp::croak q(into and into_level may not both be supplied to exporter)
241
+ if exists $config->{into} and exists $config->{into_level};
242
+
243
+ # XXX: Remove after deprecation period.
244
+ if ($config->{exporter}) {
245
+ Carp::cluck "'exporter' argument to build_exporter is deprecated. Use 'installer' instead; the semantics are identical.";
246
+ $config->{installer} = delete $config->{exporter};
247
+ }
248
+
249
+ Carp::croak q(into and into_level may not both be supplied to exporter)
250
+ if exists $config->{into} and exists $config->{into_level};
251
+
252
+ for (qw(exports collectors)) {
253
+ $config->{$_} = Data::OptList::mkopt_hash(
254
+ $config->{$_},
255
+ $_,
256
+ [ 'CODE', 'SCALAR' ],
257
+ );
258
+ }
259
+
260
+ _assert_collector_names_ok($config->{collectors});
261
+
262
+ if (my @names = _key_intersection(@$config{qw(exports collectors)})) {
263
+ Carp::croak "names (@names) used in both collections and exports";
264
+ }
265
+
266
+ $config->{groups} = Data::OptList::mkopt_hash(
267
+ $config->{groups},
268
+ 'groups',
269
+ [
270
+ 'HASH', # standard opt list
271
+ 'ARRAY', # standard opt list
272
+ 'CODE', # group generator
273
+ 'SCALAR', # name of group generation method
274
+ ]
275
+ );
276
+
277
+ # by default, export nothing
278
+ $config->{groups}{default} ||= [];
279
+
280
+ # by default, build an all-inclusive 'all' group
281
+ $config->{groups}{all} ||= [ keys %{ $config->{exports} } ];
282
+
283
+ $config->{generator} ||= \&default_generator;
284
+ $config->{installer} ||= \&default_installer;
285
+ }
286
+
287
+ sub build_exporter {
288
+ my ($config) = @_;
289
+
290
+ _rewrite_build_config($config);
291
+
292
+ my $import = sub {
293
+ my ($class) = shift;
294
+
295
+ # XXX: clean this up -- rjbs, 2006-03-16
296
+ my $special = (ref $_[0]) ? shift(@_) : {};
297
+ Carp::croak q(into and into_level may not both be supplied to exporter)
298
+ if exists $special->{into} and exists $special->{into_level};
299
+
300
+ if ($special->{exporter}) {
301
+ Carp::cluck "'exporter' special import argument is deprecated. Use 'installer' instead; the semantics are identical.";
302
+ $special->{installer} = delete $special->{exporter};
303
+ }
304
+
305
+ my $into
306
+ = defined $special->{into} ? delete $special->{into}
307
+ : defined $special->{into_level} ? caller(delete $special->{into_level})
308
+ : defined $config->{into} ? $config->{into}
309
+ : defined $config->{into_level} ? caller($config->{into_level})
310
+ : caller(0);
311
+
312
+ my $generator = delete $special->{generator} || $config->{generator};
313
+ my $installer = delete $special->{installer} || $config->{installer};
314
+
315
+ # this builds a AOA, where the inner arrays are [ name => value_ref ]
316
+ my $import_args = Data::OptList::mkopt([ @_ ]);
317
+
318
+ # is this right? defaults first or collectors first? -- rjbs, 2006-06-24
319
+ $import_args = [ [ -default => undef ] ] unless @$import_args;
320
+
321
+ my $collection = _collect_collections($config, $import_args, $class, $into);
322
+
323
+ my $to_import = _expand_groups($class, $config, $import_args, $collection);
324
+
325
+ # now, finally $import_arg is really the "to do" list
326
+ _do_import(
327
+ {
328
+ class => $class,
329
+ col => $collection,
330
+ config => $config,
331
+ into => $into,
332
+ generator => $generator,
333
+ installer => $installer,
334
+ },
335
+ $to_import,
336
+ );
337
+ };
338
+
339
+ return $import;
340
+ }
341
+
342
+ sub _do_import {
343
+ my ($arg, $to_import) = @_;
344
+
345
+ my @todo;
346
+
347
+ for my $pair (@$to_import) {
348
+ my ($name, $import_arg) = @$pair;
349
+
350
+ my ($generator, $as);
351
+
352
+ if ($import_arg and Params::Util::_CODELIKE($import_arg)) { ## no critic
353
+ # This is the case when a group generator has inserted name/code pairs.
354
+ $generator = sub { $import_arg };
355
+ $as = $name;
356
+ } else {
357
+ $import_arg = { $import_arg ? %$import_arg : () };
358
+
359
+ Carp::croak qq("$name" is not exported by the $arg->{class} module)
360
+ unless exists $arg->{config}{exports}{$name};
361
+
362
+ $generator = $arg->{config}{exports}{$name};
363
+
364
+ $as = exists $import_arg->{-as} ? (delete $import_arg->{-as}) : $name;
365
+ }
366
+
367
+ my $code = $arg->{generator}->(
368
+ {
369
+ class => $arg->{class},
370
+ name => $name,
371
+ arg => $import_arg,
372
+ col => $arg->{col},
373
+ generator => $generator,
374
+ }
375
+ );
376
+
377
+ push @todo, $as, $code;
378
+ }
379
+
380
+ $arg->{installer}->(
381
+ {
382
+ class => $arg->{class},
383
+ into => $arg->{into},
384
+ col => $arg->{col},
385
+ },
386
+ \@todo,
387
+ );
388
+ }
389
+
390
+ ## Cute idea, possibly for future use: also supply an "unimport" for:
391
+ ## no Module::Whatever qw(arg arg arg);
392
+ # sub _unexport {
393
+ # my (undef, undef, undef, undef, undef, $as, $into) = @_;
394
+ #
395
+ # if (ref $as eq 'SCALAR') {
396
+ # undef $$as;
397
+ # } elsif (ref $as) {
398
+ # Carp::croak "invalid reference type for $as: " . ref $as;
399
+ # } else {
400
+ # no strict 'refs';
401
+ # delete &{$into . '::' . $as};
402
+ # }
403
+ # }
404
+
405
+
406
+ sub default_generator {
407
+ my ($arg) = @_;
408
+ my ($class, $name, $generator) = @$arg{qw(class name generator)};
409
+
410
+ if (not defined $generator) {
411
+ my $code = $class->can($name)
412
+ or Carp::croak "can't locate exported subroutine $name via $class";
413
+ return $code;
414
+ }
415
+
416
+ # I considered making this "$class->$generator(" but it seems that
417
+ # overloading precedence would turn an overloaded-as-code generator object
418
+ # into a string before code. -- rjbs, 2006-06-11
419
+ return $generator->($class, $name, $arg->{arg}, $arg->{col})
420
+ if Params::Util::_CODELIKE($generator); ## no critic Private
421
+
422
+ # This "must" be a scalar reference, to a generator method name.
423
+ # -- rjbs, 2006-12-05
424
+ return $class->$$generator($name, $arg->{arg}, $arg->{col});
425
+ }
426
+
427
+
428
+ sub default_installer {
429
+ my ($arg, $to_export) = @_;
430
+
431
+ for (my $i = 0; $i < @$to_export; $i += 2) {
432
+ my ($as, $code) = @$to_export[ $i, $i+1 ];
433
+
434
+ # Allow as isa ARRAY to push onto an array?
435
+ # Allow into isa HASH to install name=>code into hash?
436
+
437
+ if (ref $as eq 'SCALAR') {
438
+ $$as = $code;
439
+ } elsif (ref $as) {
440
+ Carp::croak "invalid reference type for $as: " . ref $as;
441
+ } else {
442
+ Sub::Install::reinstall_sub({
443
+ code => $code,
444
+ into => $arg->{into},
445
+ as => $as
446
+ });
447
+ }
448
+ }
449
+ }
450
+
451
+ sub default_exporter {
452
+ Carp::cluck "default_exporter is deprecated; call default_installer instead; the semantics are identical";
453
+ goto &default_installer;
454
+ }
455
+
456
+
457
+ setup_exporter({
458
+ exports => [
459
+ qw(setup_exporter build_exporter),
460
+ _import => sub { build_exporter($_[2]) },
461
+ ],
462
+ groups => {
463
+ all => [ qw(setup_exporter build_export) ],
464
+ },
465
+ collectors => { -setup => \&_setup },
466
+ });
467
+
468
+ sub _setup {
469
+ my ($value, $arg) = @_;
470
+
471
+ if (ref $value eq 'HASH') {
472
+ push @{ $arg->{import_args} }, [ _import => { -as => 'import', %$value } ];
473
+ return 1;
474
+ } elsif (ref $value eq 'ARRAY') {
475
+ push @{ $arg->{import_args} },
476
+ [ _import => { -as => 'import', exports => $value } ];
477
+ return 1;
478
+ }
479
+ return;
480
+ }
481
+
482
+
483
+
484
+ "jn8:32"; # <-- magic true value
485
+
486
+ __END__
487
+ =pod
488
+
489
+ =head1 NAME
490
+
491
+ Sub::Exporter - a sophisticated exporter for custom-built routines
492
+
493
+ =head1 VERSION
494
+
495
+ version 0.984
496
+
497
+ =head1 SYNOPSIS
498
+
499
+ Sub::Exporter must be used in two places. First, in an exporting module:
500
+
501
+ # in the exporting module:
502
+ package Text::Tweaker;
503
+ use Sub::Exporter -setup => {
504
+ exports => [
505
+ qw(squish titlecase), # always works the same way
506
+ reformat => \&build_reformatter, # generator to build exported function
507
+ trim => \&build_trimmer,
508
+ indent => \&build_indenter,
509
+ ],
510
+ collectors => [ 'defaults' ],
511
+ };
512
+
513
+ Then, in an importing module:
514
+
515
+ # in the importing module:
516
+ use Text::Tweaker
517
+ 'squish',
518
+ indent => { margin => 5 },
519
+ reformat => { width => 79, justify => 'full', -as => 'prettify_text' },
520
+ defaults => { eol => 'CRLF' };
521
+
522
+ With this setup, the importing module ends up with three routines: C<squish>,
523
+ C<indent>, and C<prettify_text>. The latter two have been built to the
524
+ specifications of the importer -- they are not just copies of the code in the
525
+ exporting package.
526
+
527
+ =head1 DESCRIPTION
528
+
529
+ B<ACHTUNG!> If you're not familiar with Exporter or exporting, read
530
+ L<Sub::Exporter::Tutorial> first!
531
+
532
+ =head2 Why Generators?
533
+
534
+ The biggest benefit of Sub::Exporter over existing exporters (including the
535
+ ubiquitous Exporter.pm) is its ability to build new coderefs for export, rather
536
+ than to simply export code identical to that found in the exporting package.
537
+
538
+ If your module's consumers get a routine that works like this:
539
+
540
+ use Data::Analyze qw(analyze);
541
+ my $value = analyze($data, $tolerance, $passes);
542
+
543
+ and they constantly pass only one or two different set of values for the
544
+ non-C<$data> arguments, your code can benefit from Sub::Exporter. By writing a
545
+ simple generator, you can let them do this, instead:
546
+
547
+ use Data::Analyze
548
+ analyze => { tolerance => 0.10, passes => 10, -as => analyze10 },
549
+ analyze => { tolerance => 0.15, passes => 50, -as => analyze50 };
550
+
551
+ my $value = analyze10($data);
552
+
553
+ The generator for that would look something like this:
554
+
555
+ sub build_analyzer {
556
+ my ($class, $name, $arg) = @_;
557
+
558
+ return sub {
559
+ my $data = shift;
560
+ my $tolerance = shift || $arg->{tolerance};
561
+ my $passes = shift || $arg->{passes};
562
+
563
+ analyze($data, $tolerance, $passes);
564
+ }
565
+ }
566
+
567
+ Your module's user now has to do less work to benefit from it -- and remember,
568
+ you're often your own user! Investing in customized subroutines is an
569
+ investment in future laziness.
570
+
571
+ This also avoids a common form of ugliness seen in many modules: package-level
572
+ configuration. That is, you might have seen something like the above
573
+ implemented like so:
574
+
575
+ use Data::Analyze qw(analyze);
576
+ $Data::Analyze::default_tolerance = 0.10;
577
+ $Data::Analyze::default_passes = 10;
578
+
579
+ This might save time, until you have multiple modules using Data::Analyze.
580
+ Because there is only one global configuration, they step on each other's toes
581
+ and your code begins to have mysterious errors.
582
+
583
+ Generators can also allow you to export class methods to be called as
584
+ subroutines:
585
+
586
+ package Data::Methodical;
587
+ use Sub::Exporter -setup => { exports => { some_method => \&_curry_class } };
588
+
589
+ sub _curry_class {
590
+ my ($class, $name) = @_;
591
+ sub { $class->$name(@_); };
592
+ }
593
+
594
+ Because of the way that exporters and Sub::Exporter work, any package that
595
+ inherits from Data::Methodical can inherit its exporter and override its
596
+ C<some_method>. If a user imports C<some_method> from that package, he'll
597
+ receive a subroutine that calls the method on the subclass, rather than on
598
+ Data::Methodical itself.
599
+
600
+ =head2 Other Customizations
601
+
602
+ Building custom routines with generators isn't the only way that Sub::Exporters
603
+ allows the importing code to refine its use of the exported routines. They may
604
+ also be renamed to avoid naming collisions.
605
+
606
+ Consider the following code:
607
+
608
+ # this program determines to which circle of Hell you will be condemned
609
+ use Morality qw(sin virtue); # for calculating viciousness
610
+ use Math::Trig qw(:all); # for dealing with circles
611
+
612
+ The programmer has inadvertently imported two C<sin> routines. The solution,
613
+ in Exporter.pm-based modules, would be to import only one and then call the
614
+ other by its fully-qualified name. Alternately, the importer could write a
615
+ routine that did so, or could mess about with typeglobs.
616
+
617
+ How much easier to write:
618
+
619
+ # this program determines to which circle of Hell you will be condemned
620
+ use Morality qw(virtue), sin => { -as => 'offense' };
621
+ use Math::Trig -all => { -prefix => 'trig_' };
622
+
623
+ and to have at one's disposal C<offense> and C<trig_sin> -- not to mention
624
+ C<trig_cos> and C<trig_tan>.
625
+
626
+ =head1 EXPORTER CONFIGURATION
627
+
628
+ You can configure an exporter for your package by using Sub::Exporter like so:
629
+
630
+ package Tools;
631
+ use Sub::Exporter
632
+ -setup => { exports => [ qw(function1 function2 function3) ] };
633
+
634
+ This is the simplest way to use the exporter, and is basically equivalent to
635
+ this:
636
+
637
+ package Tools;
638
+ use base qw(Exporter);
639
+ our @EXPORT_OK = qw(function1 function2 function2);
640
+
641
+ Any basic use of Sub::Exporter will look like this:
642
+
643
+ package Tools;
644
+ use Sub::Exporter -setup => \%config;
645
+
646
+ The following keys are valid in C<%config>:
647
+
648
+ exports - a list of routines to provide for exporting; each routine may be
649
+ followed by generator
650
+ groups - a list of groups to provide for exporting; each must be followed by
651
+ either (a) a list of exports, possibly with arguments for each
652
+ export, or (b) a generator
653
+
654
+ collectors - a list of names into which values are collected for use in
655
+ routine generation; each name may be followed by a validator
656
+
657
+ In addition to the basic options above, a few more advanced options may be
658
+ passed:
659
+
660
+ into_level - how far up the caller stack to look for a target (default 0)
661
+ into - an explicit target (package) into which to export routines
662
+
663
+ In other words: Sub::Exporter installs a C<import> routine which, when called,
664
+ exports routines to the calling namespace. The C<into> and C<into_level>
665
+ options change where those exported routines are installed.
666
+
667
+ generator - a callback used to produce the code that will be installed
668
+ default: Sub::Exporter::default_generator
669
+
670
+ installer - a callback used to install the code produced by the generator
671
+ default: Sub::Exporter::default_installer
672
+
673
+ For information on how these callbacks are used, see the documentation for
674
+ C<L</default_generator>> and C<L</default_installer>>.
675
+
676
+ =head2 Export Configuration
677
+
678
+ The C<exports> list may be provided as an array reference or a hash reference.
679
+ The list is processed in such a way that the following are equivalent:
680
+
681
+ { exports => [ qw(foo bar baz), quux => \&quux_generator ] }
682
+
683
+ { exports =>
684
+ { foo => undef, bar => undef, baz => undef, quux => \&quux_generator } }
685
+
686
+ Generators are code that return coderefs. They are called with four
687
+ parameters:
688
+
689
+ $class - the class whose exporter has been called (the exporting class)
690
+ $name - the name of the export for which the routine is being build
691
+ \%arg - the arguments passed for this export
692
+ \%col - the collections for this import
693
+
694
+ Given the configuration in the L</SYNOPSIS>, the following C<use> statement:
695
+
696
+ use Text::Tweaker
697
+ reformat => { -as => 'make_narrow', width => 33 },
698
+ defaults => { eol => 'CR' };
699
+
700
+ would result in the following call to C<&build_reformatter>:
701
+
702
+ my $code = build_reformatter(
703
+ 'Text::Tweaker',
704
+ 'reformat',
705
+ { width => 33 }, # note that -as is not passed in
706
+ { defaults => { eol => 'CR' } },
707
+ );
708
+
709
+ The returned coderef (C<$code>) would then be installed as C<make_narrow> in the
710
+ calling package.
711
+
712
+ Instead of providing a coderef in the configuration, a reference to a method
713
+ name may be provided. This method will then be called on the invocant of the
714
+ C<import> method. (In this case, we do not pass the C<$class> parameter, as it
715
+ would be redundant.)
716
+
717
+ =head2 Group Configuration
718
+
719
+ The C<groups> list can be passed in the same forms as C<exports>. Groups must
720
+ have values to be meaningful, which may either list exports that make up the
721
+ group (optionally with arguments) or may provide a way to build the group.
722
+
723
+ The simpler case is the first: a group definition is a list of exports. Here's
724
+ the example that could go in exporter in the L</SYNOPSIS>.
725
+
726
+ groups => {
727
+ default => [ qw(reformat) ],
728
+ shorteners => [ qw(squish trim) ],
729
+ email_safe => [
730
+ 'indent',
731
+ reformat => { -as => 'email_format', width => 72 }
732
+ ],
733
+ },
734
+
735
+ Groups are imported by specifying their name prefixed be either a dash or a
736
+ colon. This line of code would import the C<shorteners> group:
737
+
738
+ use Text::Tweaker qw(-shorteners);
739
+
740
+ Arguments passed to a group when importing are merged into the groups options
741
+ and passed to any relevant generators. Groups can contain other groups, but
742
+ looping group structures are ignored.
743
+
744
+ The other possible value for a group definition, a coderef, allows one
745
+ generator to build several exportable routines simultaneously. This is useful
746
+ when many routines must share enclosed lexical variables. The coderef must
747
+ return a hash reference. The keys will be used as export names and the values
748
+ are the subs that will be exported.
749
+
750
+ This example shows a simple use of the group generator.
751
+
752
+ package Data::Crypto;
753
+ use Sub::Exporter -setup => { groups => { cipher => \&build_cipher_group } };
754
+
755
+ sub build_cipher_group {
756
+ my ($class, $group, $arg) = @_;
757
+ my ($encode, $decode) = build_codec($arg->{secret});
758
+ return { cipher => $encode, decipher => $decode };
759
+ }
760
+
761
+ The C<cipher> and C<decipher> routines are built in a group because they are
762
+ built together by code which encloses their secret in their environment.
763
+
764
+ =head3 Default Groups
765
+
766
+ If a module that uses Sub::Exporter is C<use>d with no arguments, it will try
767
+ to export the group named C<default>. If that group has not been specifically
768
+ configured, it will be empty, and nothing will happen.
769
+
770
+ Another group is also created if not defined: C<all>. The C<all> group
771
+ contains all the exports from the exports list.
772
+
773
+ =head2 Collector Configuration
774
+
775
+ The C<collectors> entry in the exporter configuration gives names which, when
776
+ found in the import call, have their values collected and passed to every
777
+ generator.
778
+
779
+ For example, the C<build_analyzer> generator that we saw above could be
780
+ rewritten as:
781
+
782
+ sub build_analyzer {
783
+ my ($class, $name, $arg, $col) = @_;
784
+
785
+ return sub {
786
+ my $data = shift;
787
+ my $tolerance = shift || $arg->{tolerance} || $col->{defaults}{tolerance};
788
+ my $passes = shift || $arg->{passes} || $col->{defaults}{passes};
789
+
790
+ analyze($data, $tolerance, $passes);
791
+ }
792
+ }
793
+
794
+ That would allow the import to specify global defaults for his imports:
795
+
796
+ use Data::Analyze
797
+ 'analyze',
798
+ analyze => { tolerance => 0.10, -as => analyze10 },
799
+ analyze => { tolerance => 0.15, passes => 50, -as => analyze50 },
800
+ defaults => { passes => 10 };
801
+
802
+ my $A = analyze10($data); # equivalent to analyze($data, 0.10, 10);
803
+ my $C = analyze50($data); # equivalent to analyze($data, 0.15, 10);
804
+ my $B = analyze($data, 0.20); # equivalent to analyze($data, 0.20, 10);
805
+
806
+ If values are provided in the C<collectors> list during exporter setup, they
807
+ must be code references, and are used to validate the importer's values. The
808
+ validator is called when the collection is found, and if it returns false, an
809
+ exception is thrown. We could ensure that no one tries to set a global data
810
+ default easily:
811
+
812
+ collectors => { defaults => sub { return (exists $_[0]->{data}) ? 0 : 1 } }
813
+
814
+ Collector coderefs can also be used as hooks to perform arbitrary actions
815
+ before anything is exported.
816
+
817
+ When the coderef is called, it is passed the value of the collection and a
818
+ hashref containing the following entries:
819
+
820
+ name - the name of the collector
821
+ config - the exporter configuration (hashref)
822
+ import_args - the arguments passed to the exporter, sans collections (aref)
823
+ class - the package on which the importer was called
824
+ into - the package into which exports will be exported
825
+
826
+ Collectors with all-caps names (that is, made up of underscore or capital A
827
+ through Z) are reserved for special use. The only currently implemented
828
+ special collector is C<INIT>, whose hook (if present in the exporter
829
+ configuration) is always run before any other hook.
830
+
831
+ =head1 CALLING THE EXPORTER
832
+
833
+ Arguments to the exporter (that is, the arguments after the module name in a
834
+ C<use> statement) are parsed as follows:
835
+
836
+ First, the collectors gather any collections found in the arguments. Any
837
+ reference type may be given as the value for a collector. For each collection
838
+ given in the arguments, its validator (if any) is called.
839
+
840
+ Next, groups are expanded. If the group is implemented by a group generator,
841
+ the generator is called. There are two special arguments which, if given to a
842
+ group, have special meaning:
843
+
844
+ -prefix - a string to prepend to any export imported from this group
845
+ -suffix - a string to append to any export imported from this group
846
+
847
+ Finally, individual export generators are called and all subs, generated or
848
+ otherwise, are installed in the calling package. There is only one special
849
+ argument for export generators:
850
+
851
+ -as - where to install the exported sub
852
+
853
+ Normally, C<-as> will contain an alternate name for the routine. It may,
854
+ however, contain a reference to a scalar. If that is the case, a reference the
855
+ generated routine will be placed in the scalar referenced by C<-as>. It will
856
+ not be installed into the calling package.
857
+
858
+ =head2 Special Exporter Arguments
859
+
860
+ The generated exporter accept some special options, which may be passed as the
861
+ first argument, in a hashref.
862
+
863
+ These options are:
864
+
865
+ into_level
866
+ into
867
+ generator
868
+ installer
869
+
870
+ These override the same-named configuration options described in L</EXPORTER
871
+ CONFIGURATION>.
872
+
873
+ =head1 SUBROUTINES
874
+
875
+ =head2 setup_exporter
876
+
877
+ This routine builds and installs an C<import> routine. It is called with one
878
+ argument, a hashref containing the exporter configuration. Using this, it
879
+ builds an exporter and installs it into the calling package with the name
880
+ "import." In addition to the normal exporter configuration, a few named
881
+ arguments may be passed in the hashref:
882
+
883
+ into - into what package should the exporter be installed
884
+ into_level - into what level up the stack should the exporter be installed
885
+ as - what name should the installed exporter be given
886
+
887
+ By default the exporter is installed with the name C<import> into the immediate
888
+ caller of C<setup_exporter>. In other words, if your package calls
889
+ C<setup_exporter> without providing any of the three above arguments, it will
890
+ have an C<import> routine installed.
891
+
892
+ Providing both C<into> and C<into_level> will cause an exception to be thrown.
893
+
894
+ The exporter is built by C<L</build_exporter>>.
895
+
896
+ =head2 build_exporter
897
+
898
+ Given a standard exporter configuration, this routine builds and returns an
899
+ exporter -- that is, a subroutine that can be installed as a class method to
900
+ perform exporting on request.
901
+
902
+ Usually, this method is called by C<L</setup_exporter>>, which then installs
903
+ the exporter as a package's import routine.
904
+
905
+ =head2 default_generator
906
+
907
+ This is Sub::Exporter's default generator. It takes bits of configuration that
908
+ have been gathered during the import and turns them into a coderef that can be
909
+ installed.
910
+
911
+ my $code = default_generator(\%arg);
912
+
913
+ Passed arguments are:
914
+
915
+ class - the class on which the import method was called
916
+ name - the name of the export being generated
917
+ arg - the arguments to the generator
918
+ col - the collections
919
+
920
+ generator - the generator to be used to build the export (code or scalar ref)
921
+
922
+ =head2 default_installer
923
+
924
+ This is Sub::Exporter's default installer. It does what Sub::Exporter
925
+ promises: it installs code into the target package.
926
+
927
+ default_installer(\%arg, \@to_export);
928
+
929
+ Passed arguments are:
930
+
931
+ into - the package into which exports should be delivered
932
+
933
+ C<@to_export> is a list of name/value pairs. The default exporter assigns code
934
+ (the values) to named slots (the names) in the given package. If the name is a
935
+ scalar reference, the scalar reference is made to point to the code reference
936
+ instead.
937
+
938
+ =head1 EXPORTS
939
+
940
+ Sub::Exporter also offers its own exports: the C<setup_exporter> and
941
+ C<build_exporter> routines described above. It also provides a special "setup"
942
+ collector, which will set up an exporter using the parameters passed to it.
943
+
944
+ Note that the "setup" collector (seen in examples like the L</SYNOPSIS> above)
945
+ uses C<build_exporter>, not C<setup_exporter>. This means that the special
946
+ arguments like "into" and "as" for C<setup_exporter> are not accepted here.
947
+ Instead, you may write something like:
948
+
949
+ use Sub::Exporter
950
+ { into => 'Target::Package' },
951
+ -setup => {
952
+ -as => 'do_import',
953
+ exports => [ ... ],
954
+ }
955
+ ;
956
+
957
+ Finding a good reason for wanting to do this is left as as exercise for the
958
+ reader.
959
+
960
+ =head1 COMPARISONS
961
+
962
+ There are a whole mess of exporters on the CPAN. The features included in
963
+ Sub::Exporter set it apart from any existing Exporter. Here's a summary of
964
+ some other exporters and how they compare.
965
+
966
+ =over
967
+
968
+ =item * L<Exporter> and co.
969
+
970
+ This is the standard Perl exporter. Its interface is a little clunky, but it's
971
+ fast and ubiquitous. It can do some things that Sub::Exporter can't: it can
972
+ export things other than routines, it can import "everything in this group
973
+ except this symbol," and some other more esoteric things. These features seem
974
+ to go nearly entirely unused.
975
+
976
+ It always exports things exactly as they appear in the exporting module; it
977
+ can't rename or customize routines. Its groups ("tags") can't be nested.
978
+
979
+ L<Exporter::Lite> is a whole lot like Exporter, but it does significantly less:
980
+ it supports exporting symbols, but not groups, pattern matching, or negation.
981
+
982
+ The fact that Sub::Exporter can't export symbols other than subroutines is
983
+ a good idea, not a missing feature.
984
+
985
+ For simple uses, setting up Sub::Exporter is about as easy as Exporter. For
986
+ complex uses, Sub::Exporter makes hard things possible, which would not be
987
+ possible with Exporter.
988
+
989
+ When using a module that uses Sub::Exporter, users familiar with Exporter will
990
+ probably see no difference in the basics. These two lines do about the same
991
+ thing in whether the exporting module uses Exporter or Sub::Exporter.
992
+
993
+ use Some::Module qw(foo bar baz);
994
+ use Some::Module qw(foo :bar baz);
995
+
996
+ The definition for exporting in Exporter.pm might look like this:
997
+
998
+ package Some::Module;
999
+ use base qw(Exporter);
1000
+ our @EXPORT_OK = qw(foo bar baz quux);
1001
+ our %EXPORT_TAGS = (bar => [ qw(bar baz) ]);
1002
+
1003
+ Using Sub::Exporter, it would look like this:
1004
+
1005
+ package Some::Module;
1006
+ use Sub::Exporter -setup => {
1007
+ exports => [ qw(foo bar baz quux) ],
1008
+ groups => { bar => [ qw(bar baz) ]}
1009
+ };
1010
+
1011
+ Sub::Exporter respects inheritance, so that a package may export inherited
1012
+ routines, and will export the most inherited version. Exporting methods
1013
+ without currying away the invocant is a bad idea, but Sub::Exporter allows you
1014
+ to do just that -- and anyway, there are other uses for this feature, like
1015
+ packages of exported subroutines which use inheritance specifically to allow
1016
+ more specialized, but similar, packages.
1017
+
1018
+ L<Exporter::Easy> provides a wrapper around the standard Exporter. It makes it
1019
+ simpler to build groups, but doesn't provide any more functionality. Because
1020
+ it is a front-end to Exporter, it will store your exporter's configuration in
1021
+ global package variables.
1022
+
1023
+ =item * Attribute-Based Exporters
1024
+
1025
+ Some exporters use attributes to mark variables to export. L<Exporter::Simple>
1026
+ supports exporting any kind of symbol, and supports groups. Using a module
1027
+ like Exporter or Sub::Exporter, it's easy to look at one place and see what is
1028
+ exported, but it's impossible to look at a variable definition and see whether
1029
+ it is exported by that alone. Exporter::Simple makes this trade in reverse:
1030
+ each variable's declaration includes its export definition, but there is no one
1031
+ place to look to find a manifest of exports.
1032
+
1033
+ More importantly, Exporter::Simple does not add any new features to those of
1034
+ Exporter. In fact, like Exporter::Easy, it is just a front-end to Exporter, so
1035
+ it ends up storing its configuration in global package variables. (This means
1036
+ that there is one place to look for your exporter's manifest, actually. You
1037
+ can inspect the C<@EXPORT> package variables, and other related package
1038
+ variables, at runtime.)
1039
+
1040
+ L<Perl6::Export> isn't actually attribute based, but looks similar. Its syntax
1041
+ is borrowed from Perl 6, and implemented by a source filter. It is a prototype
1042
+ of an interface that is still being designed. It should probably be avoided
1043
+ for production work. On the other hand, L<Perl6::Export::Attrs> implements
1044
+ Perl 6-like exporting, but translates it into Perl 5 by providing attributes.
1045
+
1046
+ =item * Other Exporters
1047
+
1048
+ L<Exporter::Renaming> wraps the standard Exporter to allow it to export symbols
1049
+ with changed names.
1050
+
1051
+ L<Class::Exporter> performs a special kind of routine generation, giving each
1052
+ importing package an instance of your class, and then exporting the instance's
1053
+ methods as normal routines. (Sub::Exporter, of course, can easily emulate this
1054
+ behavior, as shown above.)
1055
+
1056
+ L<Exporter::Tidy> implements a form of renaming (using its C<_map> argument)
1057
+ and of prefixing, and implements groups. It also avoids using package
1058
+ variables for its configuration.
1059
+
1060
+ =back
1061
+
1062
+ =head1 TODO
1063
+
1064
+ =over
1065
+
1066
+ =item * write a set of longer, more demonstrative examples
1067
+
1068
+ =item * solidify the "custom exporter" interface (see C<&default_exporter>)
1069
+
1070
+ =item * add an "always" group
1071
+
1072
+ =back
1073
+
1074
+ =head1 THANKS
1075
+
1076
+ Hans Dieter Pearcey provided helpful advice while I was writing Sub::Exporter.
1077
+ Ian Langworth and Shawn Sorichetti asked some good questions and helped me
1078
+ improve my documentation quite a bit. Yuval Kogman helped me find a bunch of
1079
+ little problems.
1080
+
1081
+ Thanks, guys!
1082
+
1083
+ =head1 BUGS
1084
+
1085
+ Please report any bugs or feature requests through the web interface at
1086
+ L<http://rt.cpan.org>. I will be notified, and then you'll automatically be
1087
+ notified of progress on your bug as I make changes.
1088
+
1089
+ =head1 AUTHOR
1090
+
1091
+ Ricardo Signes <rjbs@cpan.org>
1092
+
1093
+ =head1 COPYRIGHT AND LICENSE
1094
+
1095
+ This software is copyright (c) 2007 by Ricardo Signes.
1096
+
1097
+ This is free software; you can redistribute it and/or modify it under
1098
+ the same terms as the Perl 5 programming language system itself.
1099
+
1100
+ =cut
1101
+