rika 0.9.1-java → 0.9.2-java
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -3
- data/Rakefile +5 -0
- data/lib/rika.rb +48 -50
- data/lib/rika/version.rb +1 -1
- data/pom.xml +20 -0
- data/spec/fixtures/over_100k_file.txt +1241 -0
- data/spec/fixtures/text_file.txt +1 -1
- data/spec/fixtures/text_file_without_extension +1 -0
- data/spec/rika_spec.rb +18 -3
- data/{lib → target/dependency}/apache-mime4j-core-0.7.2.jar +0 -0
- data/{lib → target/dependency}/apache-mime4j-dom-0.7.2.jar +0 -0
- data/{lib → target/dependency}/asm-3.1.jar +0 -0
- data/{lib → target/dependency}/aspectjrt-1.6.11.jar +0 -0
- data/{lib → target/dependency}/bcmail-jdk15-1.45.jar +0 -0
- data/{lib → target/dependency}/bcprov-jdk15-1.45.jar +0 -0
- data/{lib → target/dependency}/boilerpipe-1.1.0.jar +0 -0
- data/{lib → target/dependency}/commons-codec-1.5.jar +0 -0
- data/{lib → target/dependency}/commons-compress-1.4.1.jar +0 -0
- data/{lib → target/dependency}/commons-logging-1.1.1.jar +0 -0
- data/{lib → target/dependency}/dom4j-1.6.1.jar +0 -0
- data/{lib → target/dependency}/fontbox-1.7.0.jar +0 -0
- data/target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/{lib → target/dependency}/isoparser-1.0-RC-1.jar +0 -0
- data/target/dependency/jdom-1.0.jar +0 -0
- data/{lib → target/dependency}/jempbox-1.7.0.jar +0 -0
- data/{lib → target/dependency}/juniversalchardet-1.0.3.jar +0 -0
- data/{lib → target/dependency}/metadata-extractor-2.4.0-beta-1.jar +0 -0
- data/target/dependency/netcdf-4.2-min.jar +0 -0
- data/{lib → target/dependency}/pdfbox-1.7.0.jar +0 -0
- data/{lib → target/dependency}/poi-3.8.jar +0 -0
- data/{lib → target/dependency}/poi-ooxml-3.8.jar +0 -0
- data/{lib → target/dependency}/poi-ooxml-schemas-3.8.jar +0 -0
- data/{lib → target/dependency}/poi-scratchpad-3.8.jar +0 -0
- data/{lib → target/dependency}/rome-0.9.jar +0 -0
- data/target/dependency/slf4j-api-1.5.6.jar +0 -0
- data/{lib → target/dependency}/tagsoup-1.2.1.jar +0 -0
- data/{lib → target/dependency}/tika-core-1.2.jar +0 -0
- data/{lib → target/dependency}/tika-parsers-1.2.jar +0 -0
- data/{lib → target/dependency}/vorbis-java-core-0.1-tests.jar +0 -0
- data/{lib → target/dependency}/vorbis-java-core-0.1.jar +0 -0
- data/{lib → target/dependency}/vorbis-java-tika-0.1.jar +0 -0
- data/{lib → target/dependency}/xmlbeans-2.3.0.jar +0 -0
- data/{lib → target/dependency}/xz-1.0.jar +0 -0
- metadata +41 -32
data/spec/fixtures/text_file.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
First they ignore you, then they ridicule you, then they fight you, then you win.
|
1
|
+
First they ignore you, then they ridicule you, then they fight you, then you win.
|
@@ -0,0 +1 @@
|
|
1
|
+
First they ignore you, then they ridicule you, then they fight you, then you win.
|
data/spec/rika_spec.rb
CHANGED
@@ -10,26 +10,41 @@ describe Rika::Parser do
|
|
10
10
|
@image_parser = Rika::Parser.new(file_path("image.jpg"))
|
11
11
|
end
|
12
12
|
|
13
|
-
it "should
|
13
|
+
it "should raise error if file does not exists" do
|
14
14
|
lambda { Rika::Parser.new(file_path("nonsense.txt")) }.should raise_error(IOError, "File does not exist")
|
15
15
|
end
|
16
16
|
|
17
|
+
it "should detect file type without a file extension" do
|
18
|
+
parser = Rika::Parser.new(file_path("text_file_without_extension"))
|
19
|
+
parser.metadata["Content-Type"].should == "text/plain; charset=ISO-8859-1"
|
20
|
+
end
|
21
|
+
|
17
22
|
describe '#content' do
|
18
23
|
it "should return the content in a text file" do
|
19
24
|
@txt_parser.content.strip.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
|
20
25
|
end
|
21
26
|
|
22
27
|
it "should return the content in a docx file" do
|
23
|
-
@docx_parser.content.
|
28
|
+
@docx_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
|
24
29
|
end
|
25
30
|
|
26
31
|
it "should return the content in a pdf file" do
|
27
|
-
@pdf_parser.content.
|
32
|
+
@pdf_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
|
28
33
|
end
|
29
34
|
|
30
35
|
it "should return no content for an image" do
|
31
36
|
@image_parser.content.should be_empty
|
32
37
|
end
|
38
|
+
|
39
|
+
it "should only return max content length" do
|
40
|
+
parser = Rika::Parser.new(file_path("text_file.txt"), 5)
|
41
|
+
parser.content.should == "First"
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should be possible to read files over 100k by default" do
|
45
|
+
parser = Rika::Parser.new(file_path("over_100k_file.txt"))
|
46
|
+
parser.content.length.should == 101_761
|
47
|
+
end
|
33
48
|
end
|
34
49
|
|
35
50
|
# We just test a few of the metadata fields for some common file formats
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
Binary file
|
File without changes
|
Binary file
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: rika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.9.
|
5
|
+
version: 0.9.2
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Richard Nyström
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -43,46 +43,53 @@ files:
|
|
43
43
|
- LICENSE.txt
|
44
44
|
- README.md
|
45
45
|
- Rakefile
|
46
|
-
- lib/apache-mime4j-core-0.7.2.jar
|
47
|
-
- lib/apache-mime4j-dom-0.7.2.jar
|
48
|
-
- lib/asm-3.1.jar
|
49
|
-
- lib/aspectjrt-1.6.11.jar
|
50
|
-
- lib/bcmail-jdk15-1.45.jar
|
51
|
-
- lib/bcprov-jdk15-1.45.jar
|
52
|
-
- lib/boilerpipe-1.1.0.jar
|
53
|
-
- lib/commons-codec-1.5.jar
|
54
|
-
- lib/commons-compress-1.4.1.jar
|
55
|
-
- lib/commons-logging-1.1.1.jar
|
56
|
-
- lib/dom4j-1.6.1.jar
|
57
|
-
- lib/fontbox-1.7.0.jar
|
58
|
-
- lib/isoparser-1.0-RC-1.jar
|
59
|
-
- lib/jempbox-1.7.0.jar
|
60
|
-
- lib/juniversalchardet-1.0.3.jar
|
61
|
-
- lib/metadata-extractor-2.4.0-beta-1.jar
|
62
|
-
- lib/pdfbox-1.7.0.jar
|
63
|
-
- lib/poi-3.8.jar
|
64
|
-
- lib/poi-ooxml-3.8.jar
|
65
|
-
- lib/poi-ooxml-schemas-3.8.jar
|
66
|
-
- lib/poi-scratchpad-3.8.jar
|
67
46
|
- lib/rika.rb
|
68
47
|
- lib/rika/version.rb
|
69
|
-
-
|
70
|
-
- lib/tagsoup-1.2.1.jar
|
71
|
-
- lib/tika-core-1.2.jar
|
72
|
-
- lib/tika-parsers-1.2.jar
|
73
|
-
- lib/vorbis-java-core-0.1-tests.jar
|
74
|
-
- lib/vorbis-java-core-0.1.jar
|
75
|
-
- lib/vorbis-java-tika-0.1.jar
|
76
|
-
- lib/xmlbeans-2.3.0.jar
|
77
|
-
- lib/xz-1.0.jar
|
48
|
+
- pom.xml
|
78
49
|
- rika.gemspec
|
79
50
|
- spec/fixtures/document.doc
|
80
51
|
- spec/fixtures/document.docx
|
81
52
|
- spec/fixtures/document.pdf
|
82
53
|
- spec/fixtures/image.jpg
|
54
|
+
- spec/fixtures/over_100k_file.txt
|
83
55
|
- spec/fixtures/text_file.txt
|
56
|
+
- spec/fixtures/text_file_without_extension
|
84
57
|
- spec/rika_spec.rb
|
85
58
|
- spec/spec_helper.rb
|
59
|
+
- target/dependency/apache-mime4j-core-0.7.2.jar
|
60
|
+
- target/dependency/apache-mime4j-dom-0.7.2.jar
|
61
|
+
- target/dependency/asm-3.1.jar
|
62
|
+
- target/dependency/aspectjrt-1.6.11.jar
|
63
|
+
- target/dependency/bcmail-jdk15-1.45.jar
|
64
|
+
- target/dependency/bcprov-jdk15-1.45.jar
|
65
|
+
- target/dependency/boilerpipe-1.1.0.jar
|
66
|
+
- target/dependency/commons-codec-1.5.jar
|
67
|
+
- target/dependency/commons-compress-1.4.1.jar
|
68
|
+
- target/dependency/commons-logging-1.1.1.jar
|
69
|
+
- target/dependency/dom4j-1.6.1.jar
|
70
|
+
- target/dependency/fontbox-1.7.0.jar
|
71
|
+
- target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar
|
72
|
+
- target/dependency/isoparser-1.0-RC-1.jar
|
73
|
+
- target/dependency/jdom-1.0.jar
|
74
|
+
- target/dependency/jempbox-1.7.0.jar
|
75
|
+
- target/dependency/juniversalchardet-1.0.3.jar
|
76
|
+
- target/dependency/metadata-extractor-2.4.0-beta-1.jar
|
77
|
+
- target/dependency/netcdf-4.2-min.jar
|
78
|
+
- target/dependency/pdfbox-1.7.0.jar
|
79
|
+
- target/dependency/poi-3.8.jar
|
80
|
+
- target/dependency/poi-ooxml-3.8.jar
|
81
|
+
- target/dependency/poi-ooxml-schemas-3.8.jar
|
82
|
+
- target/dependency/poi-scratchpad-3.8.jar
|
83
|
+
- target/dependency/rome-0.9.jar
|
84
|
+
- target/dependency/slf4j-api-1.5.6.jar
|
85
|
+
- target/dependency/tagsoup-1.2.1.jar
|
86
|
+
- target/dependency/tika-core-1.2.jar
|
87
|
+
- target/dependency/tika-parsers-1.2.jar
|
88
|
+
- target/dependency/vorbis-java-core-0.1-tests.jar
|
89
|
+
- target/dependency/vorbis-java-core-0.1.jar
|
90
|
+
- target/dependency/vorbis-java-tika-0.1.jar
|
91
|
+
- target/dependency/xmlbeans-2.3.0.jar
|
92
|
+
- target/dependency/xz-1.0.jar
|
86
93
|
homepage: https://github.com/ricn/rika
|
87
94
|
licenses: []
|
88
95
|
post_install_message:
|
@@ -114,6 +121,8 @@ test_files:
|
|
114
121
|
- spec/fixtures/document.docx
|
115
122
|
- spec/fixtures/document.pdf
|
116
123
|
- spec/fixtures/image.jpg
|
124
|
+
- spec/fixtures/over_100k_file.txt
|
117
125
|
- spec/fixtures/text_file.txt
|
126
|
+
- spec/fixtures/text_file_without_extension
|
118
127
|
- spec/rika_spec.rb
|
119
128
|
- spec/spec_helper.rb
|