words-counter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/words_counter.rb +57 -0
- metadata +43 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 0ae11e255506c68483eba2d508dc15de6180d478
|
|
4
|
+
data.tar.gz: 5909ee55bef283a08a14d07d73c1f547a59fdde7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7e3b61dbab34311fa6819f35fb274122421621eb8b6349f1c43f8c30cdcacdc282abb2d42dff1a6ea845af2e61ee9c35b0d757251e06fbe06bb505f2ad8a8bd3
|
|
7
|
+
data.tar.gz: 27d73795e2a2225cfbbaa4214952403144003268e6ced0dd68ce274aa008e1d3552c85ab8a63ac39f7c52e947f0835963d981da6b32b3d2fb33b9848d8afadcc
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'open-uri'
|
|
3
|
+
|
|
4
|
+
module WordsCounter
|
|
5
|
+
def self.analyse(url) # the analyse method takes the url and number of times a word should appear as a parameter
|
|
6
|
+
|
|
7
|
+
url = url.to_s()
|
|
8
|
+
url = URI.parse(URI.encode(url.strip)) # make sure the url is fromated correctly
|
|
9
|
+
page = Nokogiri::HTML(open(url)) # use the nokogiri gem to scrape the content of the webpage
|
|
10
|
+
text = page.css("p").text + " " + page.css("h1").text + " "# create a string from the words of the array
|
|
11
|
+
|
|
12
|
+
words = text.split(/[^a-zA-Z]/)# open stated file, split the words and write them to an array
|
|
13
|
+
words.map!(&:capitalize) #capitlize every word in the array
|
|
14
|
+
|
|
15
|
+
words.delete("A")
|
|
16
|
+
words.delete("An")
|
|
17
|
+
words.delete("And")
|
|
18
|
+
words.delete("Are")
|
|
19
|
+
words.delete("As")
|
|
20
|
+
words.delete("Be")
|
|
21
|
+
words.delete("For")
|
|
22
|
+
words.delete("Have")
|
|
23
|
+
words.delete("I")
|
|
24
|
+
words.delete("In")
|
|
25
|
+
words.delete("Is")
|
|
26
|
+
words.delete("It")
|
|
27
|
+
words.delete("Its")
|
|
28
|
+
words.delete("Of")
|
|
29
|
+
words.delete("On")
|
|
30
|
+
words.delete("Or")
|
|
31
|
+
words.delete("Our")
|
|
32
|
+
words.delete("That")
|
|
33
|
+
words.delete("The")
|
|
34
|
+
words.delete("These")
|
|
35
|
+
words.delete("This")
|
|
36
|
+
words.delete("To")
|
|
37
|
+
words.delete("We")
|
|
38
|
+
words.delete("Will")
|
|
39
|
+
words.delete("With")
|
|
40
|
+
words.delete("You")
|
|
41
|
+
words.delete("Your")
|
|
42
|
+
words.delete(" ")
|
|
43
|
+
words.delete("")# Delete several commonly used words
|
|
44
|
+
|
|
45
|
+
freqs = Hash.new(0)
|
|
46
|
+
words.each do |word|
|
|
47
|
+
freqs[word] += 1
|
|
48
|
+
end # create a hash and populate it with the words and the number of times they appear
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
freqs = freqs.sort_by {|key,value| value }
|
|
52
|
+
freqs.reverse! #organise the hash by frequency and sort it largest to smallest
|
|
53
|
+
|
|
54
|
+
result = freqs # return the hash as a result
|
|
55
|
+
return result
|
|
56
|
+
end
|
|
57
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: words-counter
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Fiachra Murray
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2016-04-15 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: returns a hash with the words on the page and their incident.
|
|
14
|
+
email: fiachra.murray.2014@gmail.com
|
|
15
|
+
executables: []
|
|
16
|
+
extensions: []
|
|
17
|
+
extra_rdoc_files: []
|
|
18
|
+
files:
|
|
19
|
+
- lib/words_counter.rb
|
|
20
|
+
homepage: http://rubygems.org/gems/words-counter
|
|
21
|
+
licenses: []
|
|
22
|
+
metadata: {}
|
|
23
|
+
post_install_message:
|
|
24
|
+
rdoc_options: []
|
|
25
|
+
require_paths:
|
|
26
|
+
- lib
|
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
28
|
+
requirements:
|
|
29
|
+
- - ">="
|
|
30
|
+
- !ruby/object:Gem::Version
|
|
31
|
+
version: '0'
|
|
32
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
33
|
+
requirements:
|
|
34
|
+
- - ">="
|
|
35
|
+
- !ruby/object:Gem::Version
|
|
36
|
+
version: '0'
|
|
37
|
+
requirements: []
|
|
38
|
+
rubyforge_project:
|
|
39
|
+
rubygems_version: 2.5.1
|
|
40
|
+
signing_key:
|
|
41
|
+
specification_version: 4
|
|
42
|
+
summary: counts the number of times a word appears on a webpage.
|
|
43
|
+
test_files: []
|