xws 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/lib/xws.rb +43 -0
  5. metadata +88 -0
  6. metadata.gz.sig +1 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5810f7acaa63084378d114ea44d5707e0e7f4f9b
4
+ data.tar.gz: 99a35051dc284771d557c6c59b30dd3ae97809c0
5
+ SHA512:
6
+ metadata.gz: edffc30afb812e82fbbb11a641c2531081e432d3c1ebad20886b3ee5570a61b55c63746f7ee62d8593efdeca4ef36e94186277b5b2e9f43c1066a66beffec25c
7
+ data.tar.gz: 215a721bcdf607e48953a5173997aba18ef61562b74eac69d6b72157be8ad311579b225b678132f051d19870de645c85da72198347f2fe81330eeb24f7d88881
checksums.yaml.gz.sig ADDED
Binary file
data.tar.gz.sig ADDED
Binary file
data/lib/xws.rb ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: xws.rb
4
+
5
+ require 'rexle'
6
+
7
+
8
+ class XWS
9
+
10
+ def initialize(ignore_elements: %i(pre code time))
11
+
12
+ @ignore_elements = ignore_elements
13
+ @ignorewords = %i(the and or)
14
+ end
15
+
16
+ def scan(node)
17
+
18
+ a = []
19
+
20
+ node.each_recursive do |x|
21
+ puts 'x : ' + x.inspect
22
+ if not x.name[/#{@ignore_elements.join('|')}/] then
23
+ a += x.texts.map(&:strip).select{|x| not x.empty?}
24
+ end
25
+ end
26
+
27
+ words(a.join(' ')).split.group_by(&:to_s).\
28
+ inject({}){|r, x| r.merge(x[0] => x[-1].length)}
29
+
30
+ end
31
+
32
+ def words(s)
33
+
34
+ s.downcase.
35
+ gsub(/\w+'\w+/,''). # remove words containing an apostrophe
36
+ gsub(/["']/,''). # remove quotation marks
37
+ gsub(/(\w)[^a-z ]+\B|\B[^a-z #]+(\w)/,'\1\2'). # remove non-alpabetical characters from start or beginning of words
38
+ gsub(/\s.\s/,' '). # remove single digits
39
+ gsub(/\b#{@ignorewords.join('|')}\b/,'')
40
+
41
+ end
42
+
43
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xws
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIDljCCAn6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBIMRIwEAYDVQQDDAlnZW1t
14
+ YXN0ZXIxHjAcBgoJkiaJk/IsZAEZFg5qYW1lc3JvYmVydHNvbjESMBAGCgmSJomT
15
+ 8ixkARkWAmV1MB4XDTE1MTExMjEzNDIwOVoXDTE2MTExMTEzNDIwOVowSDESMBAG
16
+ A1UEAwwJZ2VtbWFzdGVyMR4wHAYKCZImiZPyLGQBGRYOamFtZXNyb2JlcnRzb24x
17
+ EjAQBgoJkiaJk/IsZAEZFgJldTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
18
+ ggEBAMTc5Y2iGpVpgA0bMCMkawvWl45yZmC093FTo0ouHVxXqD6Spl6qd69TeJ47
19
+ b1luuUHWbWSD4kPgRrXCUSl3cBfSWHnrr7BBFo/s4KHLAeZeVu5oW4vyMe7LoeSq
20
+ 7m4OyHkXx1++i9p1StO3Xq4ATlJyDK88c4h0qwqLDtYbW7uo+MGDyTXu87JEE0PE
21
+ gE1zD6WNfgm/n5pbI8inijTD1Z8TUj+XTwDK88/q9qfUaZXVLFR6ZABwxU22wbNB
22
+ zvmIeQQTjHjf5WPoqEILIN64grKBKE2SkJfG0KcEEfK8MFVhvmzZRHJ06dDHaMjx
23
+ Gl6P8QmwkxCE21zlWv95M8W85tkCAwEAAaOBijCBhzAJBgNVHRMEAjAAMAsGA1Ud
24
+ DwQEAwIEsDAdBgNVHQ4EFgQUVXguJCp/zrEkJCkHCtFDEMWqv2owJgYDVR0RBB8w
25
+ HYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1h
26
+ c3RlckBqYW1lc3JvYmVydHNvbi5ldTANBgkqhkiG9w0BAQUFAAOCAQEAiEmEjzhh
27
+ zKdOtVNj+UPC4BgiAcx6Y5RVFlRhu4YHmbTNy69ufi9C96g+rY3i+x+Wai99mgPM
28
+ 0h9/iddDRokLOR4W26uZU187+MAxXHsMckIp5dMdSfLuqTSJe3yUt5iPutwSKHQu
29
+ pmWTTFlIxLlkcaAEdNmHdTW20qyxunhH5ejArJk8r33/odbQFfWPl6nUQbTqVAC7
30
+ dfHZ+bJE6nkLJVhFdKH+VCvr7bE7aa05//GN12RwurUpuSJ/ZA4EPL8InaykCUHh
31
+ gBElgRX/rNJ5r9tumb1VJ+TZMg8fu7CmCoC4FuJtnA7ajJHeC0JKSpCDYtgiOs4W
32
+ ET+JXG7GYubSoQ==
33
+ -----END CERTIFICATE-----
34
+ date: 2015-11-12 00:00:00.000000000 Z
35
+ dependencies:
36
+ - !ruby/object:Gem::Dependency
37
+ name: rexle
38
+ requirement: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '1.3'
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: 1.3.9
46
+ type: :runtime
47
+ prerelease: false
48
+ version_requirements: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - "~>"
51
+ - !ruby/object:Gem::Version
52
+ version: '1.3'
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: 1.3.9
56
+ description:
57
+ email: james@r0bertson.co.uk
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - lib/xws.rb
63
+ homepage: https://github.com/jrobertson/xws
64
+ licenses:
65
+ - MIT
66
+ metadata: {}
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ requirements: []
82
+ rubyforge_project:
83
+ rubygems_version: 2.4.8
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: XML Word Scanner (XWS) reads a Rexle document and returns a hash object containing
87
+ each word found.
88
+ test_files: []
metadata.gz.sig ADDED
@@ -0,0 +1 @@
1
+ 7@�9���}�Vꯐ]x�o��rBI�j3p�2-�t~`e<m�Yh*� 6g�����a\�&:ġa�g�Î)6���]�]nG���1�&�0� %��t���������#,U���Q0�H�.��ōmϼ72ˋǨÔ,����]�#��wm![