wiki_top_page_views 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/top_page +35 -0
- data/lib/wiki_top_page_views.rb +45 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 43816808994eb7d2a80df7f9fc82d2627bb52123
|
4
|
+
data.tar.gz: 6332c7daed022a743bd9d20108c67bee69336a57
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5a8ef1914baa45270775d298d1eb2b83fc14458064951bfdbf5666ee51d644aaaa7805c8b87fe708d3f425d6e235597517ce6a9ae8801c1572b49352ec44e9fb
|
7
|
+
data.tar.gz: 6550324ac6e9b30a1bddefcc9fa6e48abfee19e6a056e22bc6f079199912fce3b71c222a82330ba92d5126d0357cbd261967ec2d23340a7edb2cb7274c8418fd
|
data/bin/top_page
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative "../lib/wiki_top_page_views"
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
options = {}
|
6
|
+
|
7
|
+
opt_parser = OptionParser.new do |opt|
|
8
|
+
|
9
|
+
|
10
|
+
opt.on("-f","--file FILE","the file name") do |file|
|
11
|
+
options[:file] = file
|
12
|
+
end
|
13
|
+
|
14
|
+
opt.on("-l","--language LANGUAGE","the two letter abbreviation for the language") do |lang|
|
15
|
+
options[:lang] = lang
|
16
|
+
end
|
17
|
+
|
18
|
+
opt.on("-c","--count COUNT","the count of top subjects") do |count|
|
19
|
+
options[:count] = count
|
20
|
+
end
|
21
|
+
|
22
|
+
opt.on("-h","--help","help") do
|
23
|
+
puts opt_parser
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
opt_parser.parse!
|
29
|
+
file = options[:file]
|
30
|
+
lang = options[:lang] || "en"
|
31
|
+
count = options[:count] || 10
|
32
|
+
|
33
|
+
f = Find.new({file: file, lang: lang, count: count})
|
34
|
+
p f.top_topics
|
35
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
class Find
|
3
|
+
def initialize(opts={})
|
4
|
+
@lang = opts[:lang] || "en"
|
5
|
+
@top_num = opts[:top_num] || 10
|
6
|
+
@file = opts[:file]
|
7
|
+
end
|
8
|
+
|
9
|
+
def open_file
|
10
|
+
puts "opening file: #{@file}"
|
11
|
+
lang_array = IO.readlines(@file)
|
12
|
+
lang_array.map! {|s| s.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '.') }
|
13
|
+
end
|
14
|
+
|
15
|
+
def find_lang
|
16
|
+
lang_array = open_file
|
17
|
+
puts "finding language #{@lang}"
|
18
|
+
lang_array.select! { |line| /\A#{@lang}/i =~ line}
|
19
|
+
lang_array.map! {|s| s.split(" ")}
|
20
|
+
lang_array.select! {|line| line[1] !~ /^File|^Special/i}
|
21
|
+
space = lang_array.select {|line| line[4] != nil}
|
22
|
+
lang_array.delete_if {|line| line[4] != nil}
|
23
|
+
space.map! {|line| "#{line[0]}, #{line[1] + line[2]}, #{line[3]}, #{line[4]}".split(", ")}
|
24
|
+
return lang_array.concat(space)
|
25
|
+
end
|
26
|
+
|
27
|
+
def sort_array
|
28
|
+
lang_array = find_lang
|
29
|
+
puts "sorting file"
|
30
|
+
sort_a = lang_array.sort_by {|array| -array[2].to_i}
|
31
|
+
return sort_a
|
32
|
+
end
|
33
|
+
|
34
|
+
def top_topics
|
35
|
+
s_array = sort_array
|
36
|
+
puts "getting top #{@top_num}"
|
37
|
+
top_a = s_array.first(@top_num).map!{ |line| "Topic: " + line[1] + "," + " Visits: " + line[2] }
|
38
|
+
return top_a
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wiki_top_page_views
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Katie Atrops
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.1.0
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.1.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 10.4.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 10.4.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec-encoding-matchers
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.1.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.1.0
|
55
|
+
description: This gem parses data from files at http://dumps.wikimedia.org/other/pagecounts-raw/
|
56
|
+
to get the top viewed pages
|
57
|
+
email: katrops@gmail.com
|
58
|
+
executables:
|
59
|
+
- top_page
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- bin/top_page
|
64
|
+
- lib/wiki_top_page_views.rb
|
65
|
+
homepage: http://rubygems.org/gems/wiki_top_page_views
|
66
|
+
licenses:
|
67
|
+
- MIT
|
68
|
+
metadata: {}
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 2.2.2
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: Returns the top viewed wikipedia pages
|
89
|
+
test_files: []
|