rika 0.9.1-java → 0.9.2-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +7 -3
- data/Rakefile +5 -0
- data/lib/rika.rb +48 -50
- data/lib/rika/version.rb +1 -1
- data/pom.xml +20 -0
- data/spec/fixtures/over_100k_file.txt +1241 -0
- data/spec/fixtures/text_file.txt +1 -1
- data/spec/fixtures/text_file_without_extension +1 -0
- data/spec/rika_spec.rb +18 -3
- data/{lib → target/dependency}/apache-mime4j-core-0.7.2.jar +0 -0
- data/{lib → target/dependency}/apache-mime4j-dom-0.7.2.jar +0 -0
- data/{lib → target/dependency}/asm-3.1.jar +0 -0
- data/{lib → target/dependency}/aspectjrt-1.6.11.jar +0 -0
- data/{lib → target/dependency}/bcmail-jdk15-1.45.jar +0 -0
- data/{lib → target/dependency}/bcprov-jdk15-1.45.jar +0 -0
- data/{lib → target/dependency}/boilerpipe-1.1.0.jar +0 -0
- data/{lib → target/dependency}/commons-codec-1.5.jar +0 -0
- data/{lib → target/dependency}/commons-compress-1.4.1.jar +0 -0
- data/{lib → target/dependency}/commons-logging-1.1.1.jar +0 -0
- data/{lib → target/dependency}/dom4j-1.6.1.jar +0 -0
- data/{lib → target/dependency}/fontbox-1.7.0.jar +0 -0
- data/target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/{lib → target/dependency}/isoparser-1.0-RC-1.jar +0 -0
- data/target/dependency/jdom-1.0.jar +0 -0
- data/{lib → target/dependency}/jempbox-1.7.0.jar +0 -0
- data/{lib → target/dependency}/juniversalchardet-1.0.3.jar +0 -0
- data/{lib → target/dependency}/metadata-extractor-2.4.0-beta-1.jar +0 -0
- data/target/dependency/netcdf-4.2-min.jar +0 -0
- data/{lib → target/dependency}/pdfbox-1.7.0.jar +0 -0
- data/{lib → target/dependency}/poi-3.8.jar +0 -0
- data/{lib → target/dependency}/poi-ooxml-3.8.jar +0 -0
- data/{lib → target/dependency}/poi-ooxml-schemas-3.8.jar +0 -0
- data/{lib → target/dependency}/poi-scratchpad-3.8.jar +0 -0
- data/{lib → target/dependency}/rome-0.9.jar +0 -0
- data/target/dependency/slf4j-api-1.5.6.jar +0 -0
- data/{lib → target/dependency}/tagsoup-1.2.1.jar +0 -0
- data/{lib → target/dependency}/tika-core-1.2.jar +0 -0
- data/{lib → target/dependency}/tika-parsers-1.2.jar +0 -0
- data/{lib → target/dependency}/vorbis-java-core-0.1-tests.jar +0 -0
- data/{lib → target/dependency}/vorbis-java-core-0.1.jar +0 -0
- data/{lib → target/dependency}/vorbis-java-tika-0.1.jar +0 -0
- data/{lib → target/dependency}/xmlbeans-2.3.0.jar +0 -0
- data/{lib → target/dependency}/xz-1.0.jar +0 -0
- metadata +41 -32
data/spec/fixtures/text_file.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
First they ignore you, then they ridicule you, then they fight you, then you win.
|
1
|
+
First they ignore you, then they ridicule you, then they fight you, then you win.
|
@@ -0,0 +1 @@
|
|
1
|
+
First they ignore you, then they ridicule you, then they fight you, then you win.
|
data/spec/rika_spec.rb
CHANGED
@@ -10,26 +10,41 @@ describe Rika::Parser do
|
|
10
10
|
@image_parser = Rika::Parser.new(file_path("image.jpg"))
|
11
11
|
end
|
12
12
|
|
13
|
-
it "should
|
13
|
+
it "should raise error if file does not exists" do
|
14
14
|
lambda { Rika::Parser.new(file_path("nonsense.txt")) }.should raise_error(IOError, "File does not exist")
|
15
15
|
end
|
16
16
|
|
17
|
+
it "should detect file type without a file extension" do
|
18
|
+
parser = Rika::Parser.new(file_path("text_file_without_extension"))
|
19
|
+
parser.metadata["Content-Type"].should == "text/plain; charset=ISO-8859-1"
|
20
|
+
end
|
21
|
+
|
17
22
|
describe '#content' do
|
18
23
|
it "should return the content in a text file" do
|
19
24
|
@txt_parser.content.strip.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
|
20
25
|
end
|
21
26
|
|
22
27
|
it "should return the content in a docx file" do
|
23
|
-
@docx_parser.content.
|
28
|
+
@docx_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
|
24
29
|
end
|
25
30
|
|
26
31
|
it "should return the content in a pdf file" do
|
27
|
-
@pdf_parser.content.
|
32
|
+
@pdf_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
|
28
33
|
end
|
29
34
|
|
30
35
|
it "should return no content for an image" do
|
31
36
|
@image_parser.content.should be_empty
|
32
37
|
end
|
38
|
+
|
39
|
+
it "should only return max content length" do
|
40
|
+
parser = Rika::Parser.new(file_path("text_file.txt"), 5)
|
41
|
+
parser.content.should == "First"
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should be possible to read files over 100k by default" do
|
45
|
+
parser = Rika::Parser.new(file_path("over_100k_file.txt"))
|
46
|
+
parser.content.length.should == 101_761
|
47
|
+
end
|
33
48
|
end
|
34
49
|
|
35
50
|
# We just test a few of the metadata fields for some common file formats
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
Binary file
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: rika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.9.
|
5
|
+
version: 0.9.2
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Richard Nyström
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -43,46 +43,53 @@ files:
|
|
43
43
|
- LICENSE.txt
|
44
44
|
- README.md
|
45
45
|
- Rakefile
|
46
|
-
- lib/apache-mime4j-core-0.7.2.jar
|
47
|
-
- lib/apache-mime4j-dom-0.7.2.jar
|
48
|
-
- lib/asm-3.1.jar
|
49
|
-
- lib/aspectjrt-1.6.11.jar
|
50
|
-
- lib/bcmail-jdk15-1.45.jar
|
51
|
-
- lib/bcprov-jdk15-1.45.jar
|
52
|
-
- lib/boilerpipe-1.1.0.jar
|
53
|
-
- lib/commons-codec-1.5.jar
|
54
|
-
- lib/commons-compress-1.4.1.jar
|
55
|
-
- lib/commons-logging-1.1.1.jar
|
56
|
-
- lib/dom4j-1.6.1.jar
|
57
|
-
- lib/fontbox-1.7.0.jar
|
58
|
-
- lib/isoparser-1.0-RC-1.jar
|
59
|
-
- lib/jempbox-1.7.0.jar
|
60
|
-
- lib/juniversalchardet-1.0.3.jar
|
61
|
-
- lib/metadata-extractor-2.4.0-beta-1.jar
|
62
|
-
- lib/pdfbox-1.7.0.jar
|
63
|
-
- lib/poi-3.8.jar
|
64
|
-
- lib/poi-ooxml-3.8.jar
|
65
|
-
- lib/poi-ooxml-schemas-3.8.jar
|
66
|
-
- lib/poi-scratchpad-3.8.jar
|
67
46
|
- lib/rika.rb
|
68
47
|
- lib/rika/version.rb
|
69
|
-
-
|
70
|
-
- lib/tagsoup-1.2.1.jar
|
71
|
-
- lib/tika-core-1.2.jar
|
72
|
-
- lib/tika-parsers-1.2.jar
|
73
|
-
- lib/vorbis-java-core-0.1-tests.jar
|
74
|
-
- lib/vorbis-java-core-0.1.jar
|
75
|
-
- lib/vorbis-java-tika-0.1.jar
|
76
|
-
- lib/xmlbeans-2.3.0.jar
|
77
|
-
- lib/xz-1.0.jar
|
48
|
+
- pom.xml
|
78
49
|
- rika.gemspec
|
79
50
|
- spec/fixtures/document.doc
|
80
51
|
- spec/fixtures/document.docx
|
81
52
|
- spec/fixtures/document.pdf
|
82
53
|
- spec/fixtures/image.jpg
|
54
|
+
- spec/fixtures/over_100k_file.txt
|
83
55
|
- spec/fixtures/text_file.txt
|
56
|
+
- spec/fixtures/text_file_without_extension
|
84
57
|
- spec/rika_spec.rb
|
85
58
|
- spec/spec_helper.rb
|
59
|
+
- target/dependency/apache-mime4j-core-0.7.2.jar
|
60
|
+
- target/dependency/apache-mime4j-dom-0.7.2.jar
|
61
|
+
- target/dependency/asm-3.1.jar
|
62
|
+
- target/dependency/aspectjrt-1.6.11.jar
|
63
|
+
- target/dependency/bcmail-jdk15-1.45.jar
|
64
|
+
- target/dependency/bcprov-jdk15-1.45.jar
|
65
|
+
- target/dependency/boilerpipe-1.1.0.jar
|
66
|
+
- target/dependency/commons-codec-1.5.jar
|
67
|
+
- target/dependency/commons-compress-1.4.1.jar
|
68
|
+
- target/dependency/commons-logging-1.1.1.jar
|
69
|
+
- target/dependency/dom4j-1.6.1.jar
|
70
|
+
- target/dependency/fontbox-1.7.0.jar
|
71
|
+
- target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar
|
72
|
+
- target/dependency/isoparser-1.0-RC-1.jar
|
73
|
+
- target/dependency/jdom-1.0.jar
|
74
|
+
- target/dependency/jempbox-1.7.0.jar
|
75
|
+
- target/dependency/juniversalchardet-1.0.3.jar
|
76
|
+
- target/dependency/metadata-extractor-2.4.0-beta-1.jar
|
77
|
+
- target/dependency/netcdf-4.2-min.jar
|
78
|
+
- target/dependency/pdfbox-1.7.0.jar
|
79
|
+
- target/dependency/poi-3.8.jar
|
80
|
+
- target/dependency/poi-ooxml-3.8.jar
|
81
|
+
- target/dependency/poi-ooxml-schemas-3.8.jar
|
82
|
+
- target/dependency/poi-scratchpad-3.8.jar
|
83
|
+
- target/dependency/rome-0.9.jar
|
84
|
+
- target/dependency/slf4j-api-1.5.6.jar
|
85
|
+
- target/dependency/tagsoup-1.2.1.jar
|
86
|
+
- target/dependency/tika-core-1.2.jar
|
87
|
+
- target/dependency/tika-parsers-1.2.jar
|
88
|
+
- target/dependency/vorbis-java-core-0.1-tests.jar
|
89
|
+
- target/dependency/vorbis-java-core-0.1.jar
|
90
|
+
- target/dependency/vorbis-java-tika-0.1.jar
|
91
|
+
- target/dependency/xmlbeans-2.3.0.jar
|
92
|
+
- target/dependency/xz-1.0.jar
|
86
93
|
homepage: https://github.com/ricn/rika
|
87
94
|
licenses: []
|
88
95
|
post_install_message:
|
@@ -114,6 +121,8 @@ test_files:
|
|
114
121
|
- spec/fixtures/document.docx
|
115
122
|
- spec/fixtures/document.pdf
|
116
123
|
- spec/fixtures/image.jpg
|
124
|
+
- spec/fixtures/over_100k_file.txt
|
117
125
|
- spec/fixtures/text_file.txt
|
126
|
+
- spec/fixtures/text_file_without_extension
|
118
127
|
- spec/rika_spec.rb
|
119
128
|
- spec/spec_helper.rb
|