opener-tokenizer-base 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +148 -0
- data/bin/tokenizer-base +5 -0
- data/bin/tokenizer-de +5 -0
- data/bin/tokenizer-en +5 -0
- data/bin/tokenizer-es +5 -0
- data/bin/tokenizer-fr +5 -0
- data/bin/tokenizer-it +5 -0
- data/bin/tokenizer-nl +5 -0
- data/core/lib/Data/OptList.pm +256 -0
- data/core/lib/Params/Util.pm +866 -0
- data/core/lib/Sub/Exporter.pm +1101 -0
- data/core/lib/Sub/Exporter/Cookbook.pod +309 -0
- data/core/lib/Sub/Exporter/Tutorial.pod +280 -0
- data/core/lib/Sub/Exporter/Util.pm +354 -0
- data/core/lib/Sub/Install.pm +329 -0
- data/core/lib/Time/Stamp.pm +808 -0
- data/core/load-prefixes.pl +43 -0
- data/core/nonbreaking_prefixes/abbreviation_list.kaf +0 -0
- data/core/nonbreaking_prefixes/abbreviation_list.txt +444 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ca +533 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.de +781 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.el +448 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.en +564 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.es +758 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.fr +1027 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.is +697 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.it +641 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.nl +739 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.pl +729 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.pt +656 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ro +484 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ru +705 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sk +920 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sl +524 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sv +492 -0
- data/core/split-sentences.pl +114 -0
- data/core/text-fixer.pl +169 -0
- data/core/tokenizer-cli.pl +363 -0
- data/core/tokenizer.pl +145 -0
- data/lib/opener/tokenizers/base.rb +84 -0
- data/lib/opener/tokenizers/base/version.rb +8 -0
- data/opener-tokenizer-base.gemspec +25 -0
- metadata +134 -0
@@ -0,0 +1,808 @@
|
|
1
|
+
# vim: set sw=2 sts=2 ts=2 expandtab smarttab:
|
2
|
+
#
|
3
|
+
# This file is part of Time-Stamp
|
4
|
+
#
|
5
|
+
# This software is copyright (c) 2011 by Randy Stauner.
|
6
|
+
#
|
7
|
+
# This is free software; you can redistribute it and/or modify it under
|
8
|
+
# the same terms as the Perl 5 programming language system itself.
|
9
|
+
#
|
10
|
+
use strict;
|
11
|
+
use warnings;
|
12
|
+
|
13
|
+
package Time::Stamp;
|
14
|
+
{
|
15
|
+
$Time::Stamp::VERSION = '1.200';
|
16
|
+
}
|
17
|
+
BEGIN {
|
18
|
+
$Time::Stamp::AUTHORITY = 'cpan:RWSTAUNER';
|
19
|
+
}
|
20
|
+
# ABSTRACT: Easy, readable, efficient timestamp functions
|
21
|
+
|
22
|
+
# TODO: use collector?
|
23
|
+
|
24
|
+
use Sub::Exporter 0.982 -setup => {
|
25
|
+
-as => 'do_import',
|
26
|
+
exports => [
|
27
|
+
localstamp => \'_build_localstamp',
|
28
|
+
gmstamp => \'_build_gmstamp',
|
29
|
+
parsegm => \'_build_parsestamp',
|
30
|
+
parselocal => \'_build_parsestamp',
|
31
|
+
],
|
32
|
+
groups => [
|
33
|
+
stamps => [qw(localstamp gmstamp)],
|
34
|
+
parsers => [qw(parselocal parsegm)],
|
35
|
+
]
|
36
|
+
};
|
37
|
+
|
38
|
+
sub import {
|
39
|
+
@_ = map {
|
40
|
+
/(local|gm)(?:stamp)?((?:-\w+)+)/
|
41
|
+
? ($1.'stamp' => {
|
42
|
+
map {
|
43
|
+
/^([um]s)$/ ? ($1 => 1)
|
44
|
+
: (format => $_)
|
45
|
+
}
|
46
|
+
grep { $_ }
|
47
|
+
split(/-/, $2)
|
48
|
+
})
|
49
|
+
: $_
|
50
|
+
} @_;
|
51
|
+
goto &do_import;
|
52
|
+
}
|
53
|
+
|
54
|
+
# set up named formats with default values
|
55
|
+
my $formats = do {
|
56
|
+
# should we offer { prefix => '', suffix => '' } ? is that really useful?
|
57
|
+
# the stamps are easy enough to parse as is (the whole point of this module)
|
58
|
+
my %default = (
|
59
|
+
date_sep => '-',
|
60
|
+
dt_sep => 'T', # ISO 8601
|
61
|
+
time_sep => ':',
|
62
|
+
tz_sep => '',
|
63
|
+
tz => '',
|
64
|
+
);
|
65
|
+
my %blank = map { $_ => '' } keys %default;
|
66
|
+
my $n = {
|
67
|
+
default => {%default},
|
68
|
+
easy => {%default, dt_sep => ' ', tz_sep => ' '}, # easier to read
|
69
|
+
numeric => {%blank},
|
70
|
+
compact => {
|
71
|
+
%blank,
|
72
|
+
dt_sep => '_', # visual separation
|
73
|
+
},
|
74
|
+
};
|
75
|
+
# aliases
|
76
|
+
$n->{$_} = $n->{default} for qw(iso8601 rfc3339 w3cdtf);
|
77
|
+
$n;
|
78
|
+
};
|
79
|
+
|
80
|
+
# we could offer a separate format_time_array() but currently
|
81
|
+
# I think the gain would be less than the cost of the extra function call:
|
82
|
+
# sub _build { return sub { format_time_array($arg, @_ or localtime) }; }
|
83
|
+
# sub format_time_array { sprintf(_format(shift), _ymdhms(@_)) }
|
84
|
+
|
85
|
+
sub _build_localstamp {
|
86
|
+
##my ( $class, $name, $arg, $col ) = @_;
|
87
|
+
my ( undef, undef, $arg, undef ) = @_;
|
88
|
+
|
89
|
+
return _generate_code(local => $arg);
|
90
|
+
}
|
91
|
+
|
92
|
+
sub _build_gmstamp {
|
93
|
+
##my ( $class, $name, $arg, $col ) = @_;
|
94
|
+
my ( undef, undef, $arg, undef ) = @_;
|
95
|
+
|
96
|
+
# add the Z for UTC (Zulu) time zone unless the numeric format is requested
|
97
|
+
$arg = {tz => 'Z', %$arg}
|
98
|
+
unless $arg->{format} && $arg->{format} eq 'numeric';
|
99
|
+
|
100
|
+
return _generate_code(gm => $arg);
|
101
|
+
}
|
102
|
+
|
103
|
+
# TODO: could these subs be faster with a no_args option? would only save 2 if's
|
104
|
+
sub _generate_code {
|
105
|
+
my ($which, $arg) = @_;
|
106
|
+
$arg = { %$arg };
|
107
|
+
# note: mu is 03BC
|
108
|
+
$arg->{frac} ||= $arg->{us} ? 6 : $arg->{ms} ? 3 : 0;
|
109
|
+
|
110
|
+
my $format = _format($arg);
|
111
|
+
|
112
|
+
my $code;
|
113
|
+
my $vars = {
|
114
|
+
which => $which,
|
115
|
+
};
|
116
|
+
if( $arg->{frac} ){
|
117
|
+
$vars->{frac} = $arg->{frac};
|
118
|
+
# always display a fraction if requested
|
119
|
+
$vars->{gettime} = _have_hires()
|
120
|
+
? 'Time::HiRes::gettimeofday()'
|
121
|
+
# if HiRes fails to load use whole number precision
|
122
|
+
: '(CORE::time(), 0)';
|
123
|
+
$code = <<'CODE';
|
124
|
+
sub {
|
125
|
+
# localtime() will not preserve the fraction, so separate it
|
126
|
+
my ($t, $f) = @_ ? (split(/\./, $_[0]), 0) : {{gettime}};
|
127
|
+
my @lt = _ymdhms(@_ > 1 ? @_ : CORE::{{which}}time($t));
|
128
|
+
|
129
|
+
# use %.6f for precision, but strip leading zero
|
130
|
+
return sprintf($format, @lt, substr(sprintf('%.{{frac}}f', '.'.$f), 1));
|
131
|
+
};
|
132
|
+
CODE
|
133
|
+
}
|
134
|
+
# if not using fraction return a more efficient sub
|
135
|
+
else {
|
136
|
+
$code = <<'CODE';
|
137
|
+
sub {
|
138
|
+
return sprintf($format,
|
139
|
+
_ymdhms(@_ > 1 ? @_ : CORE::{{which}}time(@_ ? $_[0] : time))
|
140
|
+
);
|
141
|
+
};
|
142
|
+
CODE
|
143
|
+
}
|
144
|
+
# poor man's template (easier than sprintf or escaping sigils)
|
145
|
+
$code =~ s/\{\{(\w+)\}\}/$vars->{$1}/g;
|
146
|
+
|
147
|
+
return do { eval $code or die $@ }; ## no critic (StringyEval)
|
148
|
+
}
|
149
|
+
|
150
|
+
sub _build_parsestamp {
|
151
|
+
##my ($class, $name, $arg, $col) = @_;
|
152
|
+
my ( undef, $name, $arg, undef ) = @_;
|
153
|
+
|
154
|
+
# pre-compile the regexp
|
155
|
+
my $regexp = exists $arg->{regexp}
|
156
|
+
? qr/$arg->{regexp}/
|
157
|
+
: qr/^ (\d{4}) \D* (\d{2}) \D* (\d{2}) \D*
|
158
|
+
(\d{2}) \D* (\d{2}) \D* (\d{2}) (?:\.(\d+))? .* $/x;
|
159
|
+
|
160
|
+
require Time::Local; # core
|
161
|
+
my $time = $name eq 'parsegm'
|
162
|
+
? \&Time::Local::timegm
|
163
|
+
: \&Time::Local::timelocal;
|
164
|
+
|
165
|
+
return sub {
|
166
|
+
my ($stamp) = @_;
|
167
|
+
my ($frac, @time) = reverse ($stamp =~ $regexp);
|
168
|
+
|
169
|
+
# if the regexp didn't match (empty list) give up now
|
170
|
+
return
|
171
|
+
if !@time;
|
172
|
+
|
173
|
+
# regexp didn't have 7th capture group (for fraction)
|
174
|
+
if( @time < 6 ){
|
175
|
+
unshift @time, $frac;
|
176
|
+
# if there was a fraction in group 6 separate it
|
177
|
+
# or timelocal may produce something unexpected.
|
178
|
+
# if there was no fraction $frac will be undef
|
179
|
+
($time[0], $frac) = split(/\./, $time[0]);
|
180
|
+
}
|
181
|
+
|
182
|
+
# coerce strings into numbers (map { int } would not work for fractions)
|
183
|
+
@time = map { $_ + 0 } @time;
|
184
|
+
|
185
|
+
$time[5] -= 1900; # year
|
186
|
+
$time[4] -= 1; # month
|
187
|
+
|
188
|
+
# make sure it starts with a dot (whether it has one or not)
|
189
|
+
$frac =~ s/^0?\.?/./
|
190
|
+
if defined $frac;
|
191
|
+
|
192
|
+
if( wantarray ){
|
193
|
+
$time[0] .= $frac
|
194
|
+
if defined $frac;
|
195
|
+
return @time;
|
196
|
+
}
|
197
|
+
else {
|
198
|
+
my $ts = &$time(@time);
|
199
|
+
$ts .= $frac
|
200
|
+
if defined $frac;
|
201
|
+
return $ts;
|
202
|
+
}
|
203
|
+
};
|
204
|
+
}
|
205
|
+
|
206
|
+
sub _format {
|
207
|
+
my ($arg) = @_;
|
208
|
+
|
209
|
+
my $name = $arg->{format} || ''; # avoid undef
|
210
|
+
# we could return $arg->{format} unless exists $formats->{$name}; warn if no % found?
|
211
|
+
# or just return $arg->{sprintf} if exists $arg->{sprintf};
|
212
|
+
$name = 'default'
|
213
|
+
unless exists $formats->{$name};
|
214
|
+
|
215
|
+
# start with named format, overwrite with any explicitly specified options
|
216
|
+
my %opt = (%{ $formats->{$name} }, %$arg);
|
217
|
+
|
218
|
+
# TODO: $opt{tz} = tz_offset() if $opt{guess_tz};
|
219
|
+
|
220
|
+
# sadly "%02.6f" does not zero-pad the integer portion, so we have to be trickier
|
221
|
+
|
222
|
+
return
|
223
|
+
join($opt{date_sep}, qw(%04d %02d %02d)) .
|
224
|
+
$opt{dt_sep} .
|
225
|
+
join($opt{time_sep}, qw(%02d %02d %02d)) .
|
226
|
+
($opt{frac} ? '%s' : '') .
|
227
|
+
($opt{tz} ? $opt{tz_sep} . $opt{tz} : '')
|
228
|
+
;
|
229
|
+
}
|
230
|
+
|
231
|
+
# convert *time() arrays to something ready to send to sprintf
|
232
|
+
sub _ymdhms {
|
233
|
+
return ($_[5] + 1900, $_[4] + 1, @_[3, 2, 1, 0]);
|
234
|
+
}
|
235
|
+
|
236
|
+
my $_have_hires;
|
237
|
+
sub _have_hires {
|
238
|
+
if( !defined($_have_hires) ){
|
239
|
+
local $@;
|
240
|
+
$_have_hires = eval { require Time::HiRes; 1 } || do {
|
241
|
+
warn "Time::HiRes requested but failed to load: $@";
|
242
|
+
0;
|
243
|
+
};
|
244
|
+
}
|
245
|
+
return $_have_hires;
|
246
|
+
}
|
247
|
+
|
248
|
+
# define default localstamp and gmstamp in this package
|
249
|
+
# so that exporting is not strictly required
|
250
|
+
__PACKAGE__->import(qw(
|
251
|
+
localstamp
|
252
|
+
gmstamp
|
253
|
+
parsegm
|
254
|
+
parselocal
|
255
|
+
));
|
256
|
+
|
257
|
+
1;
|
258
|
+
|
259
|
+
|
260
|
+
__END__
|
261
|
+
=pod
|
262
|
+
|
263
|
+
=for :stopwords Randy Stauner ACKNOWLEDGEMENTS TODO timestamp gmstamp localstamp UTC
|
264
|
+
parsegm parselocal 6th 7th cpan testmatrix url annocpan anno bugtracker rt
|
265
|
+
cpants kwalitee diff irc mailto metadata placeholders metacpan
|
266
|
+
|
267
|
+
=encoding utf-8
|
268
|
+
|
269
|
+
=head1 NAME
|
270
|
+
|
271
|
+
Time::Stamp - Easy, readable, efficient timestamp functions
|
272
|
+
|
273
|
+
=head1 VERSION
|
274
|
+
|
275
|
+
version 1.200
|
276
|
+
|
277
|
+
=head1 SYNOPSIS
|
278
|
+
|
279
|
+
# import customized functions to make easy-to-use timestamps
|
280
|
+
|
281
|
+
use Time::Stamp 'gmstamp';
|
282
|
+
my $now = gmstamp();
|
283
|
+
my $mtime = gmstamp( (stat($file))[9] );
|
284
|
+
# $mtime is something like "2012-05-18T10:52:32Z"
|
285
|
+
|
286
|
+
|
287
|
+
use Time::Stamp localstamp => { -as => 'ltime', format => 'compact' };
|
288
|
+
# ltime() will return something like "20120518_105232"
|
289
|
+
|
290
|
+
use Time::Stamp -stamps => { dt_sep => ' ', date_sep => '/', us => 1 };
|
291
|
+
# localstamp() will return something like "2012/05/18 10:52:32.123456"
|
292
|
+
|
293
|
+
|
294
|
+
# inverse functions to parse the stamps
|
295
|
+
|
296
|
+
use Time::Stamp 'parsegm';
|
297
|
+
my $seconds = parsegm($stamp);
|
298
|
+
|
299
|
+
use Time::Stamp parselocal => { -as => 'parsel', regexp => qr/$pattern/ };
|
300
|
+
|
301
|
+
use Time::Stamp -parsers => { regexp => qr/$pattern/ };
|
302
|
+
|
303
|
+
|
304
|
+
# the default configurations of each function
|
305
|
+
# are available without importing into your namespace
|
306
|
+
|
307
|
+
$stamp = Time::Stamp::gmstamp($time);
|
308
|
+
$time = Time::Stamp::parsegm($stamp);
|
309
|
+
|
310
|
+
|
311
|
+
# use shortcuts for specifying desired format, useful for one-liners:
|
312
|
+
qx/perl -MTime::Stamp=local-compact -E 'say localstamp'/;
|
313
|
+
# with milliseconds:
|
314
|
+
qx/perl -MTime::Stamp=local-compact-ms -E 'say localstamp'/;
|
315
|
+
# with microseconds:
|
316
|
+
qx/perl -MTime::Stamp=local-compact-us -E 'say localstamp'/;
|
317
|
+
|
318
|
+
=head1 DESCRIPTION
|
319
|
+
|
320
|
+
This module makes it easy to include timestamp functions
|
321
|
+
that are simple, easy to read, easy to parse, and fast.
|
322
|
+
For simple timestamps perl's built-in functions are all you need:
|
323
|
+
L<time|perlfunc/time>,
|
324
|
+
L<gmtime|perlfunc/gmtime> (or L<localtime|perlfunc/localtime>),
|
325
|
+
and L<sprintf|perlfunc/sprintf>...
|
326
|
+
|
327
|
+
Sometimes you desire a simple timestamp to add to a file name
|
328
|
+
or use as part of a generated data identifier.
|
329
|
+
The fastest and easiest thing to do is call L<time()|perlfunc/time>
|
330
|
+
to get a seconds-since-epoch integer.
|
331
|
+
|
332
|
+
Sometimes you get a seconds-since-epoch integer from another function
|
333
|
+
(like L<stat()|perlfunc/stat> for instance)
|
334
|
+
and maybe you want to store that in a database or send it across the network.
|
335
|
+
|
336
|
+
This integer timestamp works for these purposes,
|
337
|
+
but it's not easy to read.
|
338
|
+
|
339
|
+
If you're looking at a list of timestamps you have to fire up a perl
|
340
|
+
interpreter and copy and paste the timestamp into
|
341
|
+
L<localtime()|perlfunc/localtime> to figure out when that actually was.
|
342
|
+
|
343
|
+
You can pass the timestamp to C<scalar localtime($sec)>
|
344
|
+
(or C<scalar gmtime($sec)>)
|
345
|
+
but that doesn't sort well or parse easily,
|
346
|
+
isn't internationally friendly,
|
347
|
+
and contains characters that aren't friendly for file names or URIs
|
348
|
+
(or other places you may want to use it).
|
349
|
+
|
350
|
+
See L<perlport/Time and Date> for more discussion on useful timestamps.
|
351
|
+
|
352
|
+
For simple timestamps you can get the data you need from
|
353
|
+
L<localtime|perlfunc/localtime> and L<gmtime|perlfunc/gmtime>
|
354
|
+
without incurring the resource cost of L<DateTime>
|
355
|
+
(or any other object for that matter).
|
356
|
+
|
357
|
+
So the aim of this module is to provide simple timestamp functions
|
358
|
+
so that you can have easy-to-use, easy-to-read timestamps efficiently.
|
359
|
+
|
360
|
+
=for test_synopsis my ( $file, $pattern, $stamp, $time );
|
361
|
+
|
362
|
+
=head1 FORMAT
|
363
|
+
|
364
|
+
For reasons listed elsewhere
|
365
|
+
the timestamps are always in order from largest unit to smallest:
|
366
|
+
year, month, day, hours, minutes, seconds
|
367
|
+
and are always two digits, except the year which is always four.
|
368
|
+
|
369
|
+
The other characters of the stamp are configurable:
|
370
|
+
|
371
|
+
=over 4
|
372
|
+
|
373
|
+
=item *
|
374
|
+
|
375
|
+
C<date_sep> - Character separating date components; Default: C<'-'>
|
376
|
+
|
377
|
+
=item *
|
378
|
+
|
379
|
+
C<dt_sep> - Character separating date and time; Default: C<'T'>
|
380
|
+
|
381
|
+
=item *
|
382
|
+
|
383
|
+
C<time_sep> - Character separating time components; Default: C<':'>
|
384
|
+
|
385
|
+
=item *
|
386
|
+
|
387
|
+
C<tz_sep> - Character separating time and timezone; Default: C<''>
|
388
|
+
|
389
|
+
=item *
|
390
|
+
|
391
|
+
C<tz> - Time zone designator; Default: C<''>
|
392
|
+
|
393
|
+
=item *
|
394
|
+
|
395
|
+
C<frac> - Digits of fractional seconds to show; Default: no fraction
|
396
|
+
|
397
|
+
=item *
|
398
|
+
|
399
|
+
C<ms> - Boolean shortcut: milliseconds; If true, same as C<< frac => 3 >>
|
400
|
+
|
401
|
+
=item *
|
402
|
+
|
403
|
+
C<us> - Boolean shortcut: microseconds; If true, same as C<< frac => 6 >>
|
404
|
+
|
405
|
+
=back
|
406
|
+
|
407
|
+
The following formats are predefined:
|
408
|
+
|
409
|
+
default => see above descriptions
|
410
|
+
iso8601 => \%default
|
411
|
+
rfc3339 => \%default
|
412
|
+
w3cdtf => \%default
|
413
|
+
"2010-01-02T13:14:15" # local
|
414
|
+
"2010-01-02T13:14:15Z" # gm
|
415
|
+
|
416
|
+
easy => like default but with a space as dt_sep and tz_sep (easier to read)
|
417
|
+
"2010-01-02 13:14:15" # local
|
418
|
+
"2010-01-02 13:14:15 Z" # gm
|
419
|
+
|
420
|
+
compact => condense date and time components and set dt_sep to '_'
|
421
|
+
"20100102_131415" # local
|
422
|
+
"20100102_131415Z" # gm
|
423
|
+
|
424
|
+
numeric => all options are '' so that only numbers remain
|
425
|
+
"20100102131415" # both
|
426
|
+
|
427
|
+
Currently there is no attempt to guess the time zone.
|
428
|
+
By default C<gmstamp> sets C<tz> to C<'Z'> (which you can override if desired).
|
429
|
+
If you are using C<gmstamp> (recommended for transmitting to another computer)
|
430
|
+
you don't need anything else. If you are using C<localstamp> you are probably
|
431
|
+
keeping the timestamp on that computer (like the stamp in a log file)
|
432
|
+
and you probably aren't concerned with time zone since it isn't likely to change.
|
433
|
+
|
434
|
+
If you want to include a time zone (other than C<'Z'> for UTC)
|
435
|
+
the standards suggest using the offset value (like C<-0700> or C<+12:00>).
|
436
|
+
If you would like to determine the time zone offset you can do something like:
|
437
|
+
|
438
|
+
use Time::Zone (); # or Time::Timezone
|
439
|
+
use Time::Stamp localtime => { tz => Time::Zone::tz_offset() };
|
440
|
+
|
441
|
+
If, despite the recommendations, you want to use the local time zone code:
|
442
|
+
|
443
|
+
use POSIX (); # included in perl core
|
444
|
+
use Time::Stamp localtime => { tz => POSIX::strftime('%Z', localtime) };
|
445
|
+
|
446
|
+
These options are not included in this module since they are not recommended
|
447
|
+
and introduce unnecessary overhead (loading the aforementioned modules).
|
448
|
+
|
449
|
+
=head1 EXPORTS
|
450
|
+
|
451
|
+
This module uses L<Sub::Exporter>
|
452
|
+
to enable you to customize your timestamp function
|
453
|
+
but still create it as easily as possible.
|
454
|
+
|
455
|
+
The customizations are done at import
|
456
|
+
and stored in the custom function returned
|
457
|
+
to make the resulting function as fast as possible.
|
458
|
+
|
459
|
+
The following groups and functions are available for export
|
460
|
+
(nothing is exported by default):
|
461
|
+
|
462
|
+
=head2 -stamps
|
463
|
+
|
464
|
+
This is a convenience group for importing both L</gmstamp> and L</localstamp>.
|
465
|
+
|
466
|
+
Each timestamp export accepts any of the keys listed in L</FORMAT>
|
467
|
+
as well as C<format> which can be the name of a predefined format.
|
468
|
+
|
469
|
+
use Time::Stamp '-stamps';
|
470
|
+
use Time::Stamp -stamps => { format => 'compact' };
|
471
|
+
|
472
|
+
use Time::Stamp gmstamp => { dt_sep => ' ', tz => ' UTC' };
|
473
|
+
|
474
|
+
use Time::Stamp localstamp => { -as => shorttime, format => 'compact' };
|
475
|
+
|
476
|
+
Each timestamp function will return a string according to the time as follows:
|
477
|
+
|
478
|
+
=over 4
|
479
|
+
|
480
|
+
=item *
|
481
|
+
|
482
|
+
If called with no arguments C<time()> (I<now>) will be used
|
483
|
+
|
484
|
+
(or L<Time::HiRes/gettimeofday> for fractional seconds).
|
485
|
+
|
486
|
+
=item *
|
487
|
+
|
488
|
+
A single argument should be an integer
|
489
|
+
(like that returned from C<time()> or C<stat()>).
|
490
|
+
|
491
|
+
If a floating point number is provided
|
492
|
+
(and fractional seconds were part of the format)
|
493
|
+
the fraction will be preserved (according to the specified precision).
|
494
|
+
|
495
|
+
=item *
|
496
|
+
|
497
|
+
More than one argument is assumed to be the list returned from
|
498
|
+
C<gmtime()> or C<localtime()> which can be useful if you previously called
|
499
|
+
the function and don't want to do it again.
|
500
|
+
|
501
|
+
If the first argument (seconds) is a floating point number
|
502
|
+
(and fractional seconds were part of the format)
|
503
|
+
the fraction will be preserved (according to the specified precision).
|
504
|
+
|
505
|
+
=back
|
506
|
+
|
507
|
+
Most commonly the 0 or 1 argument form would be used,
|
508
|
+
but the shortcut of using a time array is provided
|
509
|
+
in case you already have the array so that you don't have to use
|
510
|
+
L<Time::Local> just to get the integer back.
|
511
|
+
|
512
|
+
=head2 gmstamp
|
513
|
+
|
514
|
+
$stamp = gmstamp(); # equivalent to gmstamp(time())
|
515
|
+
$stamp = gmstamp($seconds);
|
516
|
+
$stamp = gmstamp(@gmtime);
|
517
|
+
|
518
|
+
This returns a string according to the format specified in the import call.
|
519
|
+
|
520
|
+
By default this function sets C<tz> to C<'Z'>
|
521
|
+
since C<gmtime()> returns values in C<UTC> (no time zone offset).
|
522
|
+
|
523
|
+
This is the recommended stamp as it is by default unambiguous
|
524
|
+
and useful for transmitting to another computer.
|
525
|
+
|
526
|
+
=head2 localstamp
|
527
|
+
|
528
|
+
$stamp = localstamp(); # equivalent to localstamp(time())
|
529
|
+
$stamp = localstamp($seconds);
|
530
|
+
$stamp = localstamp(@localtime);
|
531
|
+
|
532
|
+
This returns a string according to the format specified in the import call.
|
533
|
+
|
534
|
+
By default this function does not include a time zone indicator.
|
535
|
+
|
536
|
+
This function can be useful for log files or other values that stay
|
537
|
+
on the machine where time zone is not important and/or is constant.
|
538
|
+
|
539
|
+
=head2 -parsers
|
540
|
+
|
541
|
+
This is a convenience group for importing both L</parsegm> and L</parselocal>.
|
542
|
+
|
543
|
+
use Time::Stamp '-parsers';
|
544
|
+
use Time::Stamp -parsers => { regexp => qr/pattern/ };
|
545
|
+
|
546
|
+
use Time::Stamp 'parsegm';
|
547
|
+
|
548
|
+
use Time::Stamp parselocal => { -as => 'parsestamp', regexp => qr/pattern/ };
|
549
|
+
|
550
|
+
The parser functions are the inverse of the stamp functions.
|
551
|
+
They accept a timestamp and use the appropriate function from L<Time::Local>
|
552
|
+
to turn it back into a seconds-since-epoch integer.
|
553
|
+
|
554
|
+
In list context they return the list that would have been sent to L<Time::Local>
|
555
|
+
which is similar to the one returned by
|
556
|
+
L<gmtime|perlfunc/gmtime> and L<localtime|perlfunc/localtime>:
|
557
|
+
seconds, minutes, hours, day, month (0-11), year (-1900).
|
558
|
+
B<NOTE> that the C<wday>, C<yday>, and C<isdst> parameters
|
559
|
+
(the last three elements returned from C<localtime> or C<gmtime>)
|
560
|
+
are not returned because they are not easily determined from the stamp.
|
561
|
+
Besides L<Time::Local> only takes the first 6 anyway.
|
562
|
+
|
563
|
+
If the stamp doesn't match the pattern
|
564
|
+
the function will return undef in scalar context
|
565
|
+
or an empty list in list context.
|
566
|
+
|
567
|
+
An alternate regular expression can be supplied as the C<regexp> parameter
|
568
|
+
during import. The default pattern will match any of the named formats.
|
569
|
+
|
570
|
+
The pattern must capture 6 groups in the appropriate order:
|
571
|
+
year, month, day, hour, minute, second.
|
572
|
+
If you're doing something more complex you probably ought to be using
|
573
|
+
one of the modules listed in L<SEE ALSO>.
|
574
|
+
|
575
|
+
An optional 7th group can be used to capture the fractional seconds.
|
576
|
+
If only 6 groups are used, the 6th capture (seconds)
|
577
|
+
will be checked for a fraction.
|
578
|
+
The fraction will be separated from the whole number
|
579
|
+
before being passed through the L<Time::Local> functions
|
580
|
+
then appended to the result
|
581
|
+
(the number returned in scalar context,
|
582
|
+
or to the first element returned in list context)
|
583
|
+
in an attempt to provide the most expected/reliable result.
|
584
|
+
|
585
|
+
=head2 parsegm
|
586
|
+
|
587
|
+
$seconds = parsegm($stamp);
|
588
|
+
@gmtime = parsegm($stamp);
|
589
|
+
|
590
|
+
This is the inverse of L</gmstamp>.
|
591
|
+
It parses a timestamp (like the ones created by this module) and uses
|
592
|
+
L<Time::Local/timegm> to turn it back into a seconds-since-epoch integer.
|
593
|
+
|
594
|
+
=head2 parselocal
|
595
|
+
|
596
|
+
$seconds = parselocal($stamp);
|
597
|
+
@localtime = parselocal($stamp);
|
598
|
+
|
599
|
+
This is the inverse of L</localstamp>.
|
600
|
+
It parses a timestamp (like the ones created by this module) and uses
|
601
|
+
L<Time::Local/timelocal> to it them back into a seconds-since-epoch integer.
|
602
|
+
|
603
|
+
=head2 SHORTCUTS
|
604
|
+
|
605
|
+
There are also shortcuts available in the format of C<< type-format >>
|
606
|
+
that export the appropriate function using the named format.
|
607
|
+
|
608
|
+
For example:
|
609
|
+
|
610
|
+
=over 4
|
611
|
+
|
612
|
+
=item *
|
613
|
+
|
614
|
+
C<local-compact> exports a L</localstamp> function using the C<compact> format
|
615
|
+
|
616
|
+
=item *
|
617
|
+
|
618
|
+
C<gm-easy> exports a L</gmstamp> function using the C<easy> format
|
619
|
+
|
620
|
+
=back
|
621
|
+
|
622
|
+
This makes the module easier to use on the command line:
|
623
|
+
|
624
|
+
perl -MTime::Stamp=local-compact -E 'say localstamp'
|
625
|
+
|
626
|
+
Rather than:
|
627
|
+
|
628
|
+
perl -E 'use Time::Stamp localstamp => { format => "compact" }; say localstamp'
|
629
|
+
|
630
|
+
Any of the predefined formats named in L</FORMAT>
|
631
|
+
can be used in the shortcut notation.
|
632
|
+
|
633
|
+
Additionally recognized flags include:
|
634
|
+
|
635
|
+
=over 4
|
636
|
+
|
637
|
+
=item *
|
638
|
+
|
639
|
+
C<us> adds microseconds (6 digit precision): C<< local-easy-us >>
|
640
|
+
|
641
|
+
=item *
|
642
|
+
|
643
|
+
C<ms> adds milliseconds (3 digit precision): C<< gm-ms >>
|
644
|
+
|
645
|
+
=back
|
646
|
+
|
647
|
+
=head1 SEE ALSO
|
648
|
+
|
649
|
+
=over 4
|
650
|
+
|
651
|
+
=item *
|
652
|
+
|
653
|
+
L<perlport/Time and Date> - discussion on using portable, readable timestamps
|
654
|
+
|
655
|
+
=item *
|
656
|
+
|
657
|
+
L<perlfunc/localtime> - built-in function
|
658
|
+
|
659
|
+
=item *
|
660
|
+
|
661
|
+
L<perlfunc/gmtime> - built-in function
|
662
|
+
|
663
|
+
=item *
|
664
|
+
|
665
|
+
L<Timestamp::Simple> - small, less efficient, non-customizable stamp
|
666
|
+
|
667
|
+
=item *
|
668
|
+
|
669
|
+
L<Time::Piece> - object-oriented module for working with times
|
670
|
+
|
671
|
+
=item *
|
672
|
+
|
673
|
+
L<DateTime::Tiny> - object-oriented module "with as little code as possible"
|
674
|
+
|
675
|
+
=item *
|
676
|
+
|
677
|
+
L<DateTime> - large, powerful object-oriented system
|
678
|
+
|
679
|
+
=item *
|
680
|
+
|
681
|
+
L<Time::localtime> - small object-oriented/named interface to C<localtime()>
|
682
|
+
|
683
|
+
=item *
|
684
|
+
|
685
|
+
L<Time::gmtime> - small object-oriented/named interface to C<gmtime()>
|
686
|
+
|
687
|
+
=item *
|
688
|
+
|
689
|
+
L<POSIX> - large module containing standard methods including C<strftime()>
|
690
|
+
|
691
|
+
=item *
|
692
|
+
|
693
|
+
L<http://www.cl.cam.ac.uk/~mgk25/iso-time.html> - summary of C<ISO 8601>
|
694
|
+
|
695
|
+
=item *
|
696
|
+
|
697
|
+
L<http://www.w3.org/TR/NOTE-datetime> - C<W3CDTF> profile of C<ISO 8601>
|
698
|
+
|
699
|
+
=item *
|
700
|
+
|
701
|
+
L<http://www.ietf.org/rfc/rfc3339.txt> - C<RFC3339> profile of C<ISO 8601>
|
702
|
+
|
703
|
+
=back
|
704
|
+
|
705
|
+
=head1 TODO
|
706
|
+
|
707
|
+
=over 4
|
708
|
+
|
709
|
+
=item *
|
710
|
+
|
711
|
+
Allow an option for overwriting the globals
|
712
|
+
so that calling C<localtime> in scalar context will return
|
713
|
+
a stamp in the desired format.
|
714
|
+
The normal values will be returned in list context.
|
715
|
+
|
716
|
+
=back
|
717
|
+
|
718
|
+
=head1 SUPPORT
|
719
|
+
|
720
|
+
=head2 Perldoc
|
721
|
+
|
722
|
+
You can find documentation for this module with the perldoc command.
|
723
|
+
|
724
|
+
perldoc Time::Stamp
|
725
|
+
|
726
|
+
=head2 Websites
|
727
|
+
|
728
|
+
The following websites have more information about this module, and may be of help to you. As always,
|
729
|
+
in addition to those websites please use your favorite search engine to discover more resources.
|
730
|
+
|
731
|
+
=over 4
|
732
|
+
|
733
|
+
=item *
|
734
|
+
|
735
|
+
Search CPAN
|
736
|
+
|
737
|
+
The default CPAN search engine, useful to view POD in HTML format.
|
738
|
+
|
739
|
+
L<http://search.cpan.org/dist/Time-Stamp>
|
740
|
+
|
741
|
+
=item *
|
742
|
+
|
743
|
+
RT: CPAN's Bug Tracker
|
744
|
+
|
745
|
+
The RT ( Request Tracker ) website is the default bug/issue tracking system for CPAN.
|
746
|
+
|
747
|
+
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Time-Stamp>
|
748
|
+
|
749
|
+
=item *
|
750
|
+
|
751
|
+
CPAN Ratings
|
752
|
+
|
753
|
+
The CPAN Ratings is a website that allows community ratings and reviews of Perl modules.
|
754
|
+
|
755
|
+
L<http://cpanratings.perl.org/d/Time-Stamp>
|
756
|
+
|
757
|
+
=item *
|
758
|
+
|
759
|
+
CPAN Testers
|
760
|
+
|
761
|
+
The CPAN Testers is a network of smokers who run automated tests on uploaded CPAN distributions.
|
762
|
+
|
763
|
+
L<http://www.cpantesters.org/distro/T/Time-Stamp>
|
764
|
+
|
765
|
+
=item *
|
766
|
+
|
767
|
+
CPAN Testers Matrix
|
768
|
+
|
769
|
+
The CPAN Testers Matrix is a website that provides a visual overview of the test results for a distribution on various Perls/platforms.
|
770
|
+
|
771
|
+
L<http://matrix.cpantesters.org/?dist=Time-Stamp>
|
772
|
+
|
773
|
+
=item *
|
774
|
+
|
775
|
+
CPAN Testers Dependencies
|
776
|
+
|
777
|
+
The CPAN Testers Dependencies is a website that shows a chart of the test results of all dependencies for a distribution.
|
778
|
+
|
779
|
+
L<http://deps.cpantesters.org/?module=Time::Stamp>
|
780
|
+
|
781
|
+
=back
|
782
|
+
|
783
|
+
=head2 Bugs / Feature Requests
|
784
|
+
|
785
|
+
Please report any bugs or feature requests by email to C<bug-time-stamp at rt.cpan.org>, or through
|
786
|
+
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Time-Stamp>. You will be automatically notified of any
|
787
|
+
progress on the request by the system.
|
788
|
+
|
789
|
+
=head2 Source Code
|
790
|
+
|
791
|
+
|
792
|
+
L<https://github.com/rwstauner/Time-Stamp>
|
793
|
+
|
794
|
+
git clone https://github.com/rwstauner/Time-Stamp.git
|
795
|
+
|
796
|
+
=head1 AUTHOR
|
797
|
+
|
798
|
+
Randy Stauner <rwstauner@cpan.org>
|
799
|
+
|
800
|
+
=head1 COPYRIGHT AND LICENSE
|
801
|
+
|
802
|
+
This software is copyright (c) 2011 by Randy Stauner.
|
803
|
+
|
804
|
+
This is free software; you can redistribute it and/or modify it under
|
805
|
+
the same terms as the Perl 5 programming language system itself.
|
806
|
+
|
807
|
+
=cut
|
808
|
+
|