xws 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/xws.rb +43 -0
- metadata +88 -0
- metadata.gz.sig +1 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5810f7acaa63084378d114ea44d5707e0e7f4f9b
|
4
|
+
data.tar.gz: 99a35051dc284771d557c6c59b30dd3ae97809c0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: edffc30afb812e82fbbb11a641c2531081e432d3c1ebad20886b3ee5570a61b55c63746f7ee62d8593efdeca4ef36e94186277b5b2e9f43c1066a66beffec25c
|
7
|
+
data.tar.gz: 215a721bcdf607e48953a5173997aba18ef61562b74eac69d6b72157be8ad311579b225b678132f051d19870de645c85da72198347f2fe81330eeb24f7d88881
|
checksums.yaml.gz.sig
ADDED
Binary file
|
data.tar.gz.sig
ADDED
Binary file
|
data/lib/xws.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: xws.rb
|
4
|
+
|
5
|
+
require 'rexle'
|
6
|
+
|
7
|
+
|
8
|
+
class XWS
|
9
|
+
|
10
|
+
def initialize(ignore_elements: %i(pre code time))
|
11
|
+
|
12
|
+
@ignore_elements = ignore_elements
|
13
|
+
@ignorewords = %i(the and or)
|
14
|
+
end
|
15
|
+
|
16
|
+
def scan(node)
|
17
|
+
|
18
|
+
a = []
|
19
|
+
|
20
|
+
node.each_recursive do |x|
|
21
|
+
puts 'x : ' + x.inspect
|
22
|
+
if not x.name[/#{@ignore_elements.join('|')}/] then
|
23
|
+
a += x.texts.map(&:strip).select{|x| not x.empty?}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
words(a.join(' ')).split.group_by(&:to_s).\
|
28
|
+
inject({}){|r, x| r.merge(x[0] => x[-1].length)}
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def words(s)
|
33
|
+
|
34
|
+
s.downcase.
|
35
|
+
gsub(/\w+'\w+/,''). # remove words containing an apostrophe
|
36
|
+
gsub(/["']/,''). # remove quotation marks
|
37
|
+
gsub(/(\w)[^a-z ]+\B|\B[^a-z #]+(\w)/,'\1\2'). # remove non-alpabetical characters from start or beginning of words
|
38
|
+
gsub(/\s.\s/,' '). # remove single digits
|
39
|
+
gsub(/\b#{@ignorewords.join('|')}\b/,'')
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xws
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIDljCCAn6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBIMRIwEAYDVQQDDAlnZW1t
|
14
|
+
YXN0ZXIxHjAcBgoJkiaJk/IsZAEZFg5qYW1lc3JvYmVydHNvbjESMBAGCgmSJomT
|
15
|
+
8ixkARkWAmV1MB4XDTE1MTExMjEzNDIwOVoXDTE2MTExMTEzNDIwOVowSDESMBAG
|
16
|
+
A1UEAwwJZ2VtbWFzdGVyMR4wHAYKCZImiZPyLGQBGRYOamFtZXNyb2JlcnRzb24x
|
17
|
+
EjAQBgoJkiaJk/IsZAEZFgJldTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
|
18
|
+
ggEBAMTc5Y2iGpVpgA0bMCMkawvWl45yZmC093FTo0ouHVxXqD6Spl6qd69TeJ47
|
19
|
+
b1luuUHWbWSD4kPgRrXCUSl3cBfSWHnrr7BBFo/s4KHLAeZeVu5oW4vyMe7LoeSq
|
20
|
+
7m4OyHkXx1++i9p1StO3Xq4ATlJyDK88c4h0qwqLDtYbW7uo+MGDyTXu87JEE0PE
|
21
|
+
gE1zD6WNfgm/n5pbI8inijTD1Z8TUj+XTwDK88/q9qfUaZXVLFR6ZABwxU22wbNB
|
22
|
+
zvmIeQQTjHjf5WPoqEILIN64grKBKE2SkJfG0KcEEfK8MFVhvmzZRHJ06dDHaMjx
|
23
|
+
Gl6P8QmwkxCE21zlWv95M8W85tkCAwEAAaOBijCBhzAJBgNVHRMEAjAAMAsGA1Ud
|
24
|
+
DwQEAwIEsDAdBgNVHQ4EFgQUVXguJCp/zrEkJCkHCtFDEMWqv2owJgYDVR0RBB8w
|
25
|
+
HYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1h
|
26
|
+
c3RlckBqYW1lc3JvYmVydHNvbi5ldTANBgkqhkiG9w0BAQUFAAOCAQEAiEmEjzhh
|
27
|
+
zKdOtVNj+UPC4BgiAcx6Y5RVFlRhu4YHmbTNy69ufi9C96g+rY3i+x+Wai99mgPM
|
28
|
+
0h9/iddDRokLOR4W26uZU187+MAxXHsMckIp5dMdSfLuqTSJe3yUt5iPutwSKHQu
|
29
|
+
pmWTTFlIxLlkcaAEdNmHdTW20qyxunhH5ejArJk8r33/odbQFfWPl6nUQbTqVAC7
|
30
|
+
dfHZ+bJE6nkLJVhFdKH+VCvr7bE7aa05//GN12RwurUpuSJ/ZA4EPL8InaykCUHh
|
31
|
+
gBElgRX/rNJ5r9tumb1VJ+TZMg8fu7CmCoC4FuJtnA7ajJHeC0JKSpCDYtgiOs4W
|
32
|
+
ET+JXG7GYubSoQ==
|
33
|
+
-----END CERTIFICATE-----
|
34
|
+
date: 2015-11-12 00:00:00.000000000 Z
|
35
|
+
dependencies:
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rexle
|
38
|
+
requirement: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - "~>"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '1.3'
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 1.3.9
|
46
|
+
type: :runtime
|
47
|
+
prerelease: false
|
48
|
+
version_requirements: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - "~>"
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '1.3'
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.3.9
|
56
|
+
description:
|
57
|
+
email: james@r0bertson.co.uk
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- lib/xws.rb
|
63
|
+
homepage: https://github.com/jrobertson/xws
|
64
|
+
licenses:
|
65
|
+
- MIT
|
66
|
+
metadata: {}
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
requirements: []
|
82
|
+
rubyforge_project:
|
83
|
+
rubygems_version: 2.4.8
|
84
|
+
signing_key:
|
85
|
+
specification_version: 4
|
86
|
+
summary: XML Word Scanner (XWS) reads a Rexle document and returns a hash object containing
|
87
|
+
each word found.
|
88
|
+
test_files: []
|
metadata.gz.sig
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
7@�9���}�Vꯐ]x�o��rBI�j3p�2-�t~`e<m�Yh*�6g�����a\�&:ġa�g�Î)6���]�]nG���1�&�0�%��t���������#,U���Q0�H�.��ōmϼ72ˋǨÔ,����]�#��wm![
|