chupa-text-decomposer-html 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0939cf7f2e60fa69abd6b02daaa28d90f5b4642a
4
- data.tar.gz: 60a4a84981934bad89c73fb07d573a7cddeae264
3
+ metadata.gz: 75be13613ba030b332f565e1e569c9e15a75e111
4
+ data.tar.gz: 0ad922b353bffb19819340d6ffb70ce739e51fc0
5
5
  SHA512:
6
- metadata.gz: cbf1a116f7eb7eb38401e278433be037311ef7b217bdaa8a846dfcafc15ae102e1af14b543145fdc4a7333720c93c9e962f60ef08534c853e012003ae2816ada
7
- data.tar.gz: 3239708ec5f31793130c6bd0dbd602908860e23ad4cd3c5d4e7775cb3a35d061d6da399cda573cea700eebd906e8d42b80d0939d02a3f1047cde8ccc8edf4240
6
+ metadata.gz: 5f00a62cb2156eab85ab87e4a1f50aee301fe17d831965cc1a2b7fe0b6bbde6b67f8b108a16999d6ecad3df7349b84a1d8ef7103901cfdfde0ba1ad45c5c8235
7
+ data.tar.gz: 975b69746801761f2872921e83c95bd1c309f01a6ae9703be404ec81b1b92cb359049dab3aa74cc37d603d0c71734fa30ed29715489298c0c70514f8b7534e6f
@@ -22,14 +22,15 @@ end
22
22
 
23
23
  Gem::Specification.new do |spec|
24
24
  spec.name = "chupa-text-decomposer-html"
25
- spec.version = "1.0.0"
25
+ spec.version = "1.0.1"
26
26
  spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-html"
27
27
  spec.authors = ["Kouhei Sutou"]
28
28
  spec.email = ["kou@clear-code.com"]
29
29
  readme = File.read("README.md", :encoding => "UTF-8")
30
30
  entries = readme.split(/^\#\#\s(.*)$/)
31
31
  description = clean_white_space.call(entries[entries.index("Description") + 1])
32
- spec.summary, spec.description, = description.split(/\n\n+/, 3)
32
+ spec.summary = description.split(/\n\n+/, 2).first
33
+ spec.description = description
33
34
  spec.license = "LGPLv2.1 or later"
34
35
  spec.files = ["#{spec.name}.gemspec"]
35
36
  spec.files += ["README.md", "LICENSE.txt", "Rakefile", "Gemfile"]
@@ -1,5 +1,9 @@
1
1
  # News
2
2
 
3
+ ## 1.0.1: 2014-02-18
4
+
5
+ * Support chupa-text 1.0.4.
6
+
3
7
  ## 1.0.0: 2014-01-05
4
8
 
5
9
  The first release!!!
@@ -43,10 +43,13 @@ module ChupaText
43
43
  end
44
44
  decomposed_data = TextData.new(body)
45
45
  decomposed_data.uri = data.uri
46
+
47
+ attributes = decomposed_data.attributes
46
48
  title_element = (doc % "head/title")
47
- decomposed_data["title"] = title_element.text if title_element
49
+ attributes.title = title_element.text if title_element
48
50
  encoding = doc.encoding
49
- decomposed_data["encoding"] = encoding if encoding
51
+ attributes.encoding = encoding if encoding
52
+
50
53
  yield(decomposed_data)
51
54
  end
52
55
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -143,7 +143,7 @@ class TestHTML < Test::Unit::TestCase
143
143
  <body>Hello</body>
144
144
  </html>
145
145
  HTML
146
- assert_equal(["US-ASCII"], decompose(@data))
146
+ assert_equal([Encoding::US_ASCII], decompose(@data))
147
147
  end
148
148
 
149
149
  def test_xml_declaration
@@ -155,7 +155,7 @@ class TestHTML < Test::Unit::TestCase
155
155
  <body>Hello</body>
156
156
  </html>
157
157
  XHTML
158
- assert_equal(["Shift_JIS"], decompose(@data))
158
+ assert_equal([Encoding::Shift_JIS], decompose(@data))
159
159
  end
160
160
 
161
161
  def test_content_type
@@ -167,7 +167,7 @@ class TestHTML < Test::Unit::TestCase
167
167
  <body>Hello</body>
168
168
  </html>
169
169
  HTML
170
- assert_equal(["EUC-JP"], decompose(@data))
170
+ assert_equal([Encoding::EUC_JP], decompose(@data))
171
171
  end
172
172
 
173
173
  def test_meta_charset
@@ -179,7 +179,7 @@ class TestHTML < Test::Unit::TestCase
179
179
  <body>Hello</body>
180
180
  </html>
181
181
  HTML5
182
- assert_equal(["EUC-JP"], decompose(@data))
182
+ assert_equal([Encoding::EUC_JP], decompose(@data))
183
183
  end
184
184
  end
185
185
 
@@ -197,15 +197,15 @@ class TestHTML < Test::Unit::TestCase
197
197
  end
198
198
 
199
199
  def test_x_sjis
200
- assert_equal(["Windows-31J"], decompose("x-sjis"))
200
+ assert_equal([Encoding::WINDOWS_31J], decompose("x-sjis"))
201
201
  end
202
202
 
203
203
  def test_shift_jis_hyphen
204
- assert_equal(["Windows-31J"], decompose("Shift-JIS"))
204
+ assert_equal([Encoding::WINDOWS_31J], decompose("Shift-JIS"))
205
205
  end
206
206
 
207
207
  def test_shift_jis_under_score
208
- assert_equal(["Windows-31J"], decompose("Shift_JIS"))
208
+ assert_equal([Encoding::WINDOWS_31J], decompose("Shift_JIS"))
209
209
  end
210
210
  end
211
211
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text-decomposer-html
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-05 00:00:00.000000000 Z
11
+ date: 2014-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chupa-text
@@ -109,6 +109,9 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  description: |
112
+ This is a ChupaText decomposer plugin for to extract text and
113
+ meta-data from HTML.
114
+
112
115
  You can use `html` decomposer.
113
116
  email:
114
117
  - kou@clear-code.com