mme_tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +73 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +47 -0
- data/Rakefile +53 -0
- data/examples/demo_args_proc.rb +26 -0
- data/examples/demo_config.rb +53 -0
- data/examples/demo_enumerable.rb +67 -0
- data/examples/demo_print_debug.rb +35 -0
- data/examples/demo_webparse.rb +22 -0
- data/examples/tmp/config.yml +10 -0
- data/lib/mme_tools.rb +18 -0
- data/lib/mme_tools/args_proc.rb +26 -0
- data/lib/mme_tools/concurrent.rb +52 -0
- data/lib/mme_tools/config.rb +121 -0
- data/lib/mme_tools/debug.rb +29 -0
- data/lib/mme_tools/enumerable.rb +88 -0
- data/lib/mme_tools/version.rb +16 -0
- data/lib/mme_tools/webparse.rb +109 -0
- data/mme_tools.gemspec +82 -0
- data/test/test_config.rb +96 -0
- data/test/test_enumerable.rb +32 -0
- data/test/test_webparse.rb +96 -0
- metadata +167 -0
data/lib/mme_tools.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# mme_tools
|
2
|
+
# Marcel Massana 1-Sep-2011
|
3
|
+
|
4
|
+
$: << File.dirname(__FILE__)
|
5
|
+
|
6
|
+
require 'mme_tools/enumerable'
|
7
|
+
require 'mme_tools/webparse'
|
8
|
+
require 'mme_tools/debug'
|
9
|
+
require 'mme_tools/concurrent'
|
10
|
+
require 'mme_tools/config'
|
11
|
+
require 'mme_tools/args_proc'
|
12
|
+
|
13
|
+
#TODO translate all documentation to english
|
14
|
+
|
15
|
+
# Comprises various classes and methods
|
16
|
+
module MMETools
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# NamedArgs
|
2
|
+
# Marcel Massana 13-Sep-2011
|
3
|
+
#
|
4
|
+
# Extends Hash with methods based on ideas from
|
5
|
+
# http://saaientist.blogspot.com/2007/11/named-arguments-in-ruby.html
|
6
|
+
# Also thanks to ActiveSupport from Rails
|
7
|
+
|
8
|
+
module MMETools
|
9
|
+
|
10
|
+
extend self
|
11
|
+
|
12
|
+
module ArgsProc
|
13
|
+
|
14
|
+
# Tests if +options+ includes only valid keys. Raises an error if
|
15
|
+
# any key is not included within +valid_options+.
|
16
|
+
# +valid_options+ is a Hash that must include all accepted keys. values
|
17
|
+
# aren't taken into account.
|
18
|
+
def assert_valid_keys(options, valid_options)
|
19
|
+
unknown_keys = options.keys - valid_options.keys
|
20
|
+
raise(ArgumentError, "Unknown options(s): #{unknown_keys.join(", ")}") unless unknown_keys.empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# mme_tools/concurrent
|
2
|
+
# Marcel Massana 1-Sep-2011
|
3
|
+
|
4
|
+
require 'thread'
|
5
|
+
|
6
|
+
module MMETools
|
7
|
+
|
8
|
+
# Concurrent classes (some method converted to concurrent versions)
|
9
|
+
module Concurrent
|
10
|
+
|
11
|
+
extend self
|
12
|
+
|
13
|
+
class ConcurrentHash < Hash
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
@mutex = Mutex.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](*args)
|
21
|
+
@mutex.synchronize { super }
|
22
|
+
end
|
23
|
+
|
24
|
+
def []=(*args)
|
25
|
+
@mutex.synchronize { super }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class ConcurrentArray < Array
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
super
|
33
|
+
@mutex = Mutex.new
|
34
|
+
end
|
35
|
+
|
36
|
+
def [](*args)
|
37
|
+
@mutex.synchronize { super }
|
38
|
+
end
|
39
|
+
|
40
|
+
def []=(*args)
|
41
|
+
@mutex.synchronize { super }
|
42
|
+
end
|
43
|
+
|
44
|
+
def <<(*args)
|
45
|
+
@mutex.synchronize { super }
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# config
|
2
|
+
# Marcel Massana 11-Sep-2011
|
3
|
+
#
|
4
|
+
# A configuration class to tidy up setup of applications
|
5
|
+
# http://mjijackson.com/2010/02/flexible-ruby-config-objects
|
6
|
+
|
7
|
+
require 'yaml'
|
8
|
+
|
9
|
+
module MMETools
|
10
|
+
|
11
|
+
#TODO try to overcome name conflicts with existent merhods (like dump) and with Upcase keys
|
12
|
+
#TODO mirar un gem que es diu construct. Fa el mateix. Igual es pot canviar
|
13
|
+
|
14
|
+
# Helps keeping configuration parameters, i.e. of any application, grouped
|
15
|
+
# within the same object. Also, it can be saved and restored from a YAML
|
16
|
+
# file easing edition of configuration very easily.
|
17
|
+
# First of all we should create an MMETools::Config object (see documentation)
|
18
|
+
# config = MMETools::Config.new
|
19
|
+
# Configuration parametres could have any name and can be setted by
|
20
|
+
# assignation:
|
21
|
+
# config.my_own_param = {:param => 'my own value'}
|
22
|
+
# Of course, their values can be of any type if they can be marshaled with
|
23
|
+
# yaml.
|
24
|
+
# To access those parameters we can use the obvious dot attribute way.
|
25
|
+
# puts config.my_own_param # => {:param => 'my own value'}
|
26
|
+
# but also it is possible to chain dots to access inner hashes, for instance:
|
27
|
+
# puts config.my_own_param.param # => 'my_own_value'
|
28
|
+
class Config
|
29
|
+
|
30
|
+
# creates a MMETools::Config object to gracefully keep configuration
|
31
|
+
# parameters for any app. if a Hash +data+ is given it is used to populate
|
32
|
+
# it.
|
33
|
+
# cfg = MMETools::Config.new(
|
34
|
+
# :param1 => 1,
|
35
|
+
# :param2 => 2
|
36
|
+
# )
|
37
|
+
# If a +block+ is passed, it can be used to setup additional data. self is
|
38
|
+
# yielded tot tha block. For instance:
|
39
|
+
# cfg = MMETools::Config.new do |c|
|
40
|
+
# c.param1 = 1
|
41
|
+
# c.param2 = 2
|
42
|
+
# end
|
43
|
+
# Of course, both idioms can be combined though block is the last to be
|
44
|
+
# evaluated, so its actions may overwrite hash created config data.
|
45
|
+
def initialize(data={},&block)
|
46
|
+
@data = {}
|
47
|
+
update!(data)
|
48
|
+
yield(self) if block_given?
|
49
|
+
end
|
50
|
+
|
51
|
+
# updates kept configuration with +data+ (Hash or another MMETools::Config
|
52
|
+
# object. If a key already exists the corresponding value updates.
|
53
|
+
def update!(data)
|
54
|
+
# can't be used @data.merge because []= is differently defined (below)
|
55
|
+
case data
|
56
|
+
when Hash
|
57
|
+
when MMETools::Config
|
58
|
+
data = data.to_hash
|
59
|
+
else raise ArgumentError, "Only Hash objects or MMETools::Config objects admited"
|
60
|
+
end
|
61
|
+
data.each do |key, value|
|
62
|
+
self[key] = value
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
alias merge! update!
|
67
|
+
|
68
|
+
# creates a hash from a MMETools::Config object.
|
69
|
+
def to_hash
|
70
|
+
@data.inject({}) do |ac,(k,v)|
|
71
|
+
ac.merge! k => ((v.kind_of? self.class) ? v.to_hash : v)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def [](key)
|
76
|
+
@data[key.to_sym]
|
77
|
+
end
|
78
|
+
|
79
|
+
def []=(key, value)
|
80
|
+
if value.class == Hash
|
81
|
+
@data[key.to_sym] = Config.new(value)
|
82
|
+
else
|
83
|
+
@data[key.to_sym] = value
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# loads a hash from YAML file +filename+ and merges its contents
|
88
|
+
def load(filename)
|
89
|
+
update! YAML.load_file(filename)
|
90
|
+
end
|
91
|
+
|
92
|
+
# creates a MMETools::Config object form an already dumped one.
|
93
|
+
# +filename+ is the name of the file containing configuration.
|
94
|
+
def self.load(filename)
|
95
|
+
obj = self.new
|
96
|
+
obj.load filename
|
97
|
+
obj
|
98
|
+
end
|
99
|
+
|
100
|
+
# saves configuration into a _yaml_ file named +filename+
|
101
|
+
def dump(filename)
|
102
|
+
File.open(filename,'w') do |f|
|
103
|
+
YAML.dump(self.to_hash,f)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
alias save dump
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def method_missing(sym, *args)
|
112
|
+
if sym.to_s =~ /(.+)=$/
|
113
|
+
self[$1] = args.first
|
114
|
+
else
|
115
|
+
self[sym]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# mme_tools/debug
|
2
|
+
# Marcel Massana 1-Sep-2011
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'thread'
|
6
|
+
|
7
|
+
module MMETools
|
8
|
+
|
9
|
+
# tiny methods for debugging
|
10
|
+
module Debug
|
11
|
+
|
12
|
+
extend self
|
13
|
+
|
14
|
+
# outputs a debug message and details of each one of the +vars+ if included.
|
15
|
+
# +stck_lvls+ is the number of stack levels to be showed
|
16
|
+
# +vars+ is a list of vars to be pretty printed. It is convenient to
|
17
|
+
# make the first to be a String with an informative message.
|
18
|
+
def print_debug(stck_lvls, *vars)
|
19
|
+
@mutex ||= Mutex.new # instance mutex created the first time it is called
|
20
|
+
referers = caller[0...stck_lvls] if stck_lvls > 0
|
21
|
+
@mutex.synchronize do
|
22
|
+
referers.each { |r| puts "#{r}:"}
|
23
|
+
vars.each { |v| pp v } if vars
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# mme_tools/enumerable
|
2
|
+
# Marcel Massana 1-Sep-2011
|
3
|
+
|
4
|
+
module MMETools
|
5
|
+
|
6
|
+
# Methods for Enumerables (Arrays and other each-enabled stuff)
|
7
|
+
module Enumerable
|
8
|
+
|
9
|
+
extend self
|
10
|
+
|
11
|
+
# torna un array on cada element es una tupla formada per
|
12
|
+
# un element de cada enumerable. Si se li passa un bloc
|
13
|
+
# se li passa al bloc cada tupla i el resultat del bloc
|
14
|
+
# s'emmagatzema a l'array tornat.
|
15
|
+
def compose(*enumerables)
|
16
|
+
res=[]
|
17
|
+
enumerables.map(&:size).max.times do
|
18
|
+
tupla=[]
|
19
|
+
for enumerable in enumerables
|
20
|
+
tupla << enumerable.shift
|
21
|
+
end
|
22
|
+
res << (block_given? ? yield(tupla) : tupla)
|
23
|
+
end
|
24
|
+
res
|
25
|
+
end
|
26
|
+
|
27
|
+
# Interessant iterador que classifica un enumerable
|
28
|
+
# (The Ruby Way , Ed. 2 - p 289)
|
29
|
+
def classify(enumrbl, &block)
|
30
|
+
hash = {}
|
31
|
+
enumrbl.each do |el|
|
32
|
+
res = block.call el # tb res=yield(el)
|
33
|
+
hash[res] = [] unless hash.has_key? res
|
34
|
+
hash[res] << el
|
35
|
+
end
|
36
|
+
hash
|
37
|
+
end
|
38
|
+
|
39
|
+
# FIXME I don't know really why I designed this ... possibly drop candidate
|
40
|
+
# returns an array containing from +first+ to +last+
|
41
|
+
# options is a hash that can contain:
|
42
|
+
# +:comp=>eq_method+ is a a symbol with the name of the method that sent to
|
43
|
+
# an element with another element as parameter evaluates equality. If not
|
44
|
+
# supplied +:==+ assumed.
|
45
|
+
# +:max=>max_num+ is the maximum size of the returned array. If not supplied
|
46
|
+
# false assumed (no limit)
|
47
|
+
# +:last_included?=>true or false+ tells if +last+ should be included.
|
48
|
+
# If not included +true+ assumed
|
49
|
+
# The code block is not optional: it is passed an element and should return
|
50
|
+
# the next.
|
51
|
+
def from_to(first, last, options=nil)
|
52
|
+
if options && options.is_a?(Hash)
|
53
|
+
maxcount = options.has_key?(:max) ? options[:max] : false
|
54
|
+
lastincluded = options.has_key?(:last_included?) ? options[:last_included?] : true
|
55
|
+
else
|
56
|
+
maxcount = false
|
57
|
+
lastincluded = true
|
58
|
+
end
|
59
|
+
ret = [first]
|
60
|
+
count = 1
|
61
|
+
while true
|
62
|
+
first = yield(first)
|
63
|
+
if first == last or (maxcount ? (count > maxcount) : false)
|
64
|
+
ret << first if lastincluded
|
65
|
+
break
|
66
|
+
end
|
67
|
+
ret << first
|
68
|
+
end
|
69
|
+
ret
|
70
|
+
end
|
71
|
+
|
72
|
+
# torna un array amb els elements parells
|
73
|
+
# mes a http://stackoverflow.com/questions/1614147/odd-or-even-entries-in-a-ruby-array
|
74
|
+
def odd_values(array)
|
75
|
+
array.values_at(* array.each_index.select {|i| i.odd?})
|
76
|
+
# array.select_with_index{|item, i| item if i % 2 == 1}
|
77
|
+
end
|
78
|
+
|
79
|
+
# torna un array amb els elements senars
|
80
|
+
# mes a http://stackoverflow.com/questions/1614147/odd-or-even-entries-in-a-ruby-array
|
81
|
+
def even_values(array)
|
82
|
+
array.values_at(* array.each_index.select {|i| i.even?})
|
83
|
+
# array.select_with_index{|item, i| item if i % 2 == 1}
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# mme_tools/webparse
|
3
|
+
# Marcel Massana 1-Sep-2011
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'iconv'
|
7
|
+
require 'unicode'
|
8
|
+
require 'jcode' if RUBY_VERSION < '1.9'
|
9
|
+
require 'date'
|
10
|
+
|
11
|
+
# $KCODE = "u"
|
12
|
+
|
13
|
+
module MMETools
|
14
|
+
|
15
|
+
# methods for processing strings while parsing webpages
|
16
|
+
module Webparse
|
17
|
+
|
18
|
+
extend self # elegant truc per a que tots els metodes siguin class/module methods
|
19
|
+
|
20
|
+
# torna una uri treient-hi les invocacions javascript si n'hi ha. Per exemple
|
21
|
+
# "javascript:openDoc('/gisa/documentos/cartes/PT.DOC')" -> "/gisa/documentos/cartes/PT.DOC"
|
22
|
+
def clear_uri uri
|
23
|
+
case uri
|
24
|
+
when /Doc\('.*'\)/ then uri.match(/Doc\('(.*)'\)/).captures[0]
|
25
|
+
else uri
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# treu els espais innecessaris i codis HTML d'enmig i extrems a un string
|
30
|
+
# neteja l'string eliminant tots els no printables dels extrems i els
|
31
|
+
# d'enmig els substitueiux per un unic espai.
|
32
|
+
# Les opcions +opts+ poden ser:
|
33
|
+
# +:encoding+ => "ASCII" | "UTF8" (default)
|
34
|
+
# "ASCII" converteix tots els caracters al mes semblant ASCII (amb Iconv)
|
35
|
+
# "UTF8" torna una cadena UTF8
|
36
|
+
# (based on an idea of Obie Fernandez http://www.jroller.com/obie/tags/unicode)
|
37
|
+
def clear_string(str, opts={})
|
38
|
+
options = {:encoding=>'UTF8'}.merge opts # default option :encoding=>'UTF8'
|
39
|
+
str=str.chars.map { |c| (c.bytes[0] <= 127) ? c : translation_hash[c] }.join if options[:encoding]=='ASCII'
|
40
|
+
str.gsub(/[\s\302\240]+/mu," ").strip # el caracter UTF8 "\302\240" correspon al de HTML
|
41
|
+
end
|
42
|
+
## La versio original basada en Iconv no funciona en algunes circumstancies
|
43
|
+
# def orig_clear_string(str, options={:encoding=>'UTF8'})
|
44
|
+
# str=Iconv.conv('ASCII//TRANSLIT//IGNORE', 'UTF8', str) if options[:encoding]=='ASCII'
|
45
|
+
# str.gsub(/[\s\302\240]+/mu," ").strip # el caracter UTF8 "\302\240" correspon al de HTML
|
46
|
+
# end
|
47
|
+
|
48
|
+
# Intenta convertir +str+ a ASCII pur i dur
|
49
|
+
def asciify(str)
|
50
|
+
Iconv.conv('ASCII//TRANSLIT//IGNORE', 'UTF8', str)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Transforms a string +str+ to an acronym
|
54
|
+
def acronymize(str)
|
55
|
+
cleared_str = clear_string(str, :encoding => 'ASCII').gsub(/\W/," ")
|
56
|
+
|
57
|
+
# opcio 1
|
58
|
+
unwanted_words_pttrn = %w[de en].map {|w| "\\b#{w}\\b"}.join("|")
|
59
|
+
res = cleared_str.gsub(/\b\w\b|#{unwanted_words_pttrn}/i," ")
|
60
|
+
res = res.split(" ").map {|s| s[0..0].upcase}.join
|
61
|
+
|
62
|
+
# opcio 2
|
63
|
+
if res == ""
|
64
|
+
res = cleared_str.split(" ").map {|s| s[0..0].upcase}.join
|
65
|
+
end
|
66
|
+
res
|
67
|
+
end
|
68
|
+
|
69
|
+
# Transforms +str+ to a shortened version:
|
70
|
+
# strips all non-alphanumeric chars, non-ascii and spaces and joins every word
|
71
|
+
# first two letters capitalized
|
72
|
+
def shorten(str)
|
73
|
+
cleared_str = clear_string(str, :encoding => 'ASCII').gsub(/\W/," ")
|
74
|
+
cleared_str.split(" ").map {|s| s[0..1].capitalize}.join
|
75
|
+
end
|
76
|
+
|
77
|
+
# Extracts and returns the first provable DateTime from a string
|
78
|
+
def datify(str)
|
79
|
+
pttrn = /(\d+)[\/-](\d+)[\/-](\d+)(\W+(\d+)\:(\d+))?/
|
80
|
+
day, month, year, dummy, hour, min = str.match(pttrn).captures.map {|d| d ? d.to_i : 0 }
|
81
|
+
case year
|
82
|
+
when 0..69
|
83
|
+
year += 2000
|
84
|
+
when 70..99
|
85
|
+
year += 1900
|
86
|
+
end
|
87
|
+
DateTime.civil year, month, day, hour, min
|
88
|
+
end
|
89
|
+
|
90
|
+
protected
|
91
|
+
|
92
|
+
def translation_hash
|
93
|
+
@@translation_hash ||= setup_translation_hash
|
94
|
+
end
|
95
|
+
|
96
|
+
def setup_translation_hash
|
97
|
+
accented_chars = "ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý".chars.map{|c| c}
|
98
|
+
unaccented_chars = "AAAAAACEEEEIIIIDNOOOOOxOUUUUYaaaaaaceeeeiiiinoooooouuuuy".split('')
|
99
|
+
|
100
|
+
translation_hash = {}
|
101
|
+
accented_chars.each_with_index { |char, idx| translation_hash[char] = unaccented_chars[idx] }
|
102
|
+
translation_hash["Æ"] = 'AE'
|
103
|
+
translation_hash["æ"] = 'ae'
|
104
|
+
translation_hash
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|