words-counter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/words_counter.rb +57 -0
  3. metadata +43 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0ae11e255506c68483eba2d508dc15de6180d478
4
+ data.tar.gz: 5909ee55bef283a08a14d07d73c1f547a59fdde7
5
+ SHA512:
6
+ metadata.gz: 7e3b61dbab34311fa6819f35fb274122421621eb8b6349f1c43f8c30cdcacdc282abb2d42dff1a6ea845af2e61ee9c35b0d757251e06fbe06bb505f2ad8a8bd3
7
+ data.tar.gz: 27d73795e2a2225cfbbaa4214952403144003268e6ced0dd68ce274aa008e1d3552c85ab8a63ac39f7c52e947f0835963d981da6b32b3d2fb33b9848d8afadcc
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module WordsCounter
5
+ def self.analyse(url) # the analyse method takes the url and number of times a word should appear as a parameter
6
+
7
+ url = url.to_s()
8
+ url = URI.parse(URI.encode(url.strip)) # make sure the url is fromated correctly
9
+ page = Nokogiri::HTML(open(url)) # use the nokogiri gem to scrape the content of the webpage
10
+ text = page.css("p").text + " " + page.css("h1").text + " "# create a string from the words of the array
11
+
12
+ words = text.split(/[^a-zA-Z]/)# open stated file, split the words and write them to an array
13
+ words.map!(&:capitalize) #capitlize every word in the array
14
+
15
+ words.delete("A")
16
+ words.delete("An")
17
+ words.delete("And")
18
+ words.delete("Are")
19
+ words.delete("As")
20
+ words.delete("Be")
21
+ words.delete("For")
22
+ words.delete("Have")
23
+ words.delete("I")
24
+ words.delete("In")
25
+ words.delete("Is")
26
+ words.delete("It")
27
+ words.delete("Its")
28
+ words.delete("Of")
29
+ words.delete("On")
30
+ words.delete("Or")
31
+ words.delete("Our")
32
+ words.delete("That")
33
+ words.delete("The")
34
+ words.delete("These")
35
+ words.delete("This")
36
+ words.delete("To")
37
+ words.delete("We")
38
+ words.delete("Will")
39
+ words.delete("With")
40
+ words.delete("You")
41
+ words.delete("Your")
42
+ words.delete(" ")
43
+ words.delete("")# Delete several commonly used words
44
+
45
+ freqs = Hash.new(0)
46
+ words.each do |word|
47
+ freqs[word] += 1
48
+ end # create a hash and populate it with the words and the number of times they appear
49
+
50
+
51
+ freqs = freqs.sort_by {|key,value| value }
52
+ freqs.reverse! #organise the hash by frequency and sort it largest to smallest
53
+
54
+ result = freqs # return the hash as a result
55
+ return result
56
+ end
57
+ end
metadata ADDED
@@ -0,0 +1,43 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: words-counter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Fiachra Murray
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: returns a hash with the words on the page and their incident.
14
+ email: fiachra.murray.2014@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/words_counter.rb
20
+ homepage: http://rubygems.org/gems/words-counter
21
+ licenses: []
22
+ metadata: {}
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
32
+ required_rubygems_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ requirements: []
38
+ rubyforge_project:
39
+ rubygems_version: 2.5.1
40
+ signing_key:
41
+ specification_version: 4
42
+ summary: counts the number of times a word appears on a webpage.
43
+ test_files: []