keynote-extractor 0.1.0 → 0.1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a1491658cd7e58721cfaeabe9030b257f3037f8c
4
- data.tar.gz: 003b63def25af52ecec9b0e31ee6393d54fca168
3
+ metadata.gz: 0bd676fb1b144a046ba94028627fe850b95c02d1
4
+ data.tar.gz: 442558386e41aebe40b41c835951c8468db3dd46
5
5
  SHA512:
6
- metadata.gz: 4ecf90175893095e4631a1dd9d1da84a9633fd934848d07fc2d1fcd9fd3b29082985d191462e5fc49ce3ac9f671babc146d3d69a145d77908c108277f700549b
7
- data.tar.gz: 28c37450e24cea4c72cd4761a717247f2d8f6ad23952cbf39c43df701a6e1af1b1175ebca6bb55def7d8be40213553b5d93fa64dd48adca6873cc5d2aa20fdc6
6
+ metadata.gz: 50c069aeae316b188476ab4f284b9377100f69047dd1cdc7fe5425fef4102b9279a60af24ffb0d1722d485c416da14284676aa338dd765d2c75b896ea8589254
7
+ data.tar.gz: 5e55dcb6d5c5d9cfd8f366f434b047de93495fd3d9a13ac0eaaf862744120a73ee1c840ad20bdba193c1d713c437a2a3c9b310cf381871715f0ea480b4c417a6
@@ -24,6 +24,6 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "bundler", "~> 1.13"
25
25
  spec.add_development_dependency "rake", "~> 10.0"
26
26
  spec.add_development_dependency "rspec", "~> 3.0"
27
- spec.add_development_dependency "nokogiri"
28
- spec.add_development_dependency "rubyzip"
27
+ spec.add_development_dependency "nokogiri", "1.7.0"
28
+ spec.add_development_dependency "rubyzip", "1.2.0"
29
29
  end
@@ -6,7 +6,7 @@ module Keynote
6
6
  module Extractor
7
7
  class Parse
8
8
  def self.text(file)
9
- content = ArchiveExtractor.return_index(file)
9
+ content = ArchiveExtractor.return_files(file)
10
10
  TextExtractor.get_text(content)
11
11
  end
12
12
  end
@@ -8,15 +8,19 @@ module Keynote
8
8
  archive = Zip::File.open(file)
9
9
  end
10
10
 
11
- def self.return_index(file)
11
+ def self.return_files(file)
12
12
  archive = extract(file)
13
13
 
14
+ iwa_files = []
14
15
  archive.each do |item|
15
16
  if item.name == 'index.apxl'
16
17
  return item.get_input_stream.read
17
18
  end
19
+ iwa_files << item.get_input_stream.read if item.name.split('.').last == 'iwa'
18
20
  end
19
- end
21
+
22
+ iwa_files
23
+ end
20
24
  end
21
25
  end
22
26
  end
@@ -6,11 +6,20 @@ module Keynote
6
6
  class TextExtractor
7
7
  def self.get_text(content)
8
8
  text = []
9
- text_nodes = search_for_bodies(content)
10
- text_nodes.each do |tn|
11
- content = get_from_bodies(tn)
12
- text << content unless content.empty?
13
- end
9
+ if content.kind_of?(Array)
10
+ content.each do |c|
11
+ extracted = extract_iwa(c)
12
+ extracted.each do |word|
13
+ text << word
14
+ end
15
+ end
16
+ else
17
+ text_nodes = search_for_bodies(content)
18
+ text_nodes.each do |tn|
19
+ content = get_from_bodies(tn)
20
+ text << content unless content.empty?
21
+ end
22
+ end
14
23
 
15
24
  text.uniq
16
25
  end
@@ -23,6 +32,17 @@ module Keynote
23
32
  def self.get_from_bodies(body_text)
24
33
  body_text.child.content
25
34
  end
35
+
36
+ def self.extract_iwa(file)
37
+ res = []
38
+ words = file.encode!('UTF-8', 'UTF-8', :invalid => :replace)
39
+ .gsub(/[^0-9a-z ]/i, '').split(" ")
40
+ words.each do |w|
41
+ res << w unless w.length > 20 or w.include? "Transition" or w.length <= 2
42
+ end
43
+
44
+ res
45
+ end
26
46
  end
27
47
  end
28
48
  end
@@ -1,5 +1,5 @@
1
1
  module Keynote
2
2
  module Extractor
3
- VERSION = "0.1.0"
3
+ VERSION = "0.1.1.1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: keynote-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cem Baykam
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-27 00:00:00.000000000 Z
11
+ date: 2017-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -56,30 +56,30 @@ dependencies:
56
56
  name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - '='
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: 1.7.0
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - '='
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: 1.7.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rubyzip
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - '='
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: 1.2.0
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - '='
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: 1.2.0
83
83
  description: Extracts texts and images from apple keynote files
84
84
  email:
85
85
  - cbaykam@gmail.com