kcna 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6e8215fbfaf2abdef2e8fd42f3e9b11cce17b762
4
+ data.tar.gz: b8214218bb1104dba3529e3c20c610aa1aa72804
5
+ SHA512:
6
+ metadata.gz: 8e1d84e5676bacc5a107cc4ad29680eced2c56a1cf8120bd4a3af8c6783d216a9942d9780c3262c4fdb5c1c11b535af97b85c3d72dd66d91d4a81bcafc2127dc
7
+ data.tar.gz: 12a957382d0fcfa5d207d5788fd5af82ab974f34d9a16eacf233b556722fb34e88cb456628e5104ec3fd9332bf010423d6120a3833432478a3d3e4f2b821f179
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Hinata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # kcna.rb
2
+ A Ruby gem for kcna.kp(KCNA, Korean Central News Agency)
data/lib/kcna.rb ADDED
@@ -0,0 +1,112 @@
1
+ require "kcna/version"
2
+ require "kcna/article"
3
+ require "httpclient"
4
+ require "date"
5
+ require "rexml/document"
6
+
7
+ class KCNA
8
+ KO = "kor"
9
+ EN = "eng"
10
+ ZH = "chn"
11
+ RU = "rus"
12
+ ES = "spn"
13
+ JA = "jpn"
14
+
15
+ def initialize
16
+ @client = HTTPClient.new
17
+ end
18
+
19
+ def normalize_text(content)
20
+ great_leader_pattern = /<nobr><strong><font.*>(.*)<\/font><\/strong><\/nobr>/
21
+ patterns = ["\n", "<br>", "&nbsp;", great_leader_pattern]
22
+ content.gsub(Regexp.union(patterns)) do |match|
23
+ case match
24
+ when "\n", "&nbsp;"
25
+ ""
26
+ when "<br>"
27
+ "\n"
28
+ when great_leader_pattern
29
+ $1
30
+ end
31
+ end.sub(/(---|‐‐‐)$/, "")
32
+ end
33
+
34
+ private def post(path, body, max_redirect = 3)
35
+ raise "Too many redirects" if max_redirect == 0
36
+
37
+ res = @client.post("http://kcna.kp#{path}", body: body)
38
+ if res.ok?
39
+ res
40
+ elsif res.redirect?
41
+ raise "Response error: #{res.status}" unless res.status == HTTP::Status::TEMPORARY_REDIRECT
42
+ post(path, body, max_redirect - 1)
43
+ else
44
+ raise "Response error: #{res.status}"
45
+ end
46
+ end
47
+
48
+ def set_language(lang)
49
+ data = {
50
+ article_code: "", article_type_list: "", news_type_code: "", show_what: "", mediaCode: "",
51
+ lang: lang
52
+ }
53
+ post("/kcna.user.home.retrieveHomeInfoList.kcmsf", data)
54
+ end
55
+
56
+ private def fetch_article(article_id)
57
+ data = { article_code: article_id, kwContent: "" }
58
+ post("/kcna.user.article.retrieveArticleInfoFromArticleCode.kcmsf", data).body
59
+ end
60
+
61
+ def get_article(article_id)
62
+ doc = REXML::Document.new(fetch_article(article_id))
63
+ container = REXML::XPath.first(doc, "//NData")
64
+ raise "Article not found" if container.elements.size == 0
65
+
66
+ date = Date.strptime(REXML::XPath.first(doc, "//articleCreateDate").text, "%Y.%m.%d")
67
+ content = normalize_text(REXML::XPath.first(doc, "//content").text)
68
+ display_title = normalize_text(REXML::XPath.first(doc, "//dispTitle").text)
69
+ main_title = normalize_text(REXML::XPath.first(doc, "//mainTitle").text)
70
+ sub_title = normalize_text(REXML::XPath.first(doc, "//subTitle").text)
71
+ article_id = REXML::XPath.first(doc, "//articleCode").text
72
+ movie_count = REXML::XPath.first(doc, "//fMovieCnt").text.to_i
73
+ photo_count = REXML::XPath.first(doc, "//fPhotoCnt").text.to_i
74
+ music_count = REXML::XPath.first(doc, "//fMusicCnt").text.to_i
75
+
76
+ Article.new(
77
+ id: article_id, content: content,
78
+ date: date,
79
+ main_title: main_title, sub_title: sub_title, display_title: display_title,
80
+ movie_count: movie_count, photo_count: photo_count, music_count: music_count
81
+ )
82
+ end
83
+
84
+ private def fetch_article_list(start, news_type, from_date, to_date)
85
+ data = { page_start: start, kwDispTitle: "", keyword: "", newsTypeCode: news_type, articleTypeList: "", photoCount: 0, movieCount: 0, kwContent: "", fromDate: from_date, toDate: to_date }
86
+ post("/kcna.user.article.retrieveArticleListForPage.kcmsf", data).body
87
+ end
88
+
89
+ def get_article_list(start = 0, news_type: "", from_date: "", to_date: "")
90
+ doc = REXML::Document.new(fetch_article_list(start, news_type, from_date, to_date))
91
+ article_ids = REXML::XPath.match(doc, "//articleCode").map(&:text)
92
+ disp_titles = REXML::XPath.match(doc, "//dispTitle").map { |node| normalize_text(node.text) }
93
+ main_titles = REXML::XPath.match(doc, "//mainTitle").map { |node| normalize_text(node.text) }
94
+ sub_titles = REXML::XPath.match(doc, "//subTitle").map { |node| normalize_text(node.text) }
95
+ dates = REXML::XPath.match(doc, "//sendInfo").map(&:text)
96
+ movie_counts = REXML::XPath.match(doc, "//fMovieCnt").map { |node| node.text.to_i }
97
+ music_counts = REXML::XPath.match(doc, "//fMusicCnt").map { |node| node.text.to_i }
98
+ photo_counts = REXML::XPath.match(doc, "//fPhotoCnt").map { |node| node.text.to_i }
99
+
100
+ article_ids.zip(
101
+ disp_titles, main_titles, sub_titles, dates,
102
+ movie_counts, music_counts, photo_counts
103
+ ).map do |id, disp, main, sub, date, movie, music, photo|
104
+ date = "2015-04-02" if id == "AR0060168"
105
+ Article.new(
106
+ id: id, date: Date.parse(date),
107
+ display_title: disp, main_title: main, sub_title: sub,
108
+ movie_count: movie, music_count: music, photo_count: photo
109
+ )
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,21 @@
1
+ class KCNA::Article
2
+ attr_reader :id
3
+
4
+ def self.attr_reader_hash(key, default = nil)
5
+ define_method(key, -> { @attrs[key].nil? ? default : @attrs[key] })
6
+ end
7
+
8
+ attr_reader_hash :date
9
+ attr_reader_hash :content
10
+ attr_reader_hash :main_title
11
+ attr_reader_hash :sub_title, ""
12
+ attr_reader_hash :display_title
13
+ attr_reader_hash :movie_count, 0
14
+ attr_reader_hash :photo_count, 0
15
+ attr_reader_hash :music_count, 0
16
+
17
+ def initialize(id:, **attrs)
18
+ @id = id
19
+ @attrs = attrs
20
+ end
21
+ end
@@ -0,0 +1,3 @@
1
+ class KCNA
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kcna
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hinata
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-07-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: httpclient
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.8'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.8'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.15'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.15'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.10'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.10'
83
+ description:
84
+ email:
85
+ - syobon.hinata.public@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - LICENSE
91
+ - README.md
92
+ - lib/kcna.rb
93
+ - lib/kcna/article.rb
94
+ - lib/kcna/version.rb
95
+ homepage: https://github.com/hinamiyagk/kcna.rb
96
+ licenses:
97
+ - MIT
98
+ metadata: {}
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ requirements: []
114
+ rubyforge_project:
115
+ rubygems_version: 2.6.12
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: A Ruby gem for kcna.kp(KCNA, Korean Central News Agency)
119
+ test_files: []