rika 0.9.1-java → 0.9.2-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/README.md +7 -3
  2. data/Rakefile +5 -0
  3. data/lib/rika.rb +48 -50
  4. data/lib/rika/version.rb +1 -1
  5. data/pom.xml +20 -0
  6. data/spec/fixtures/over_100k_file.txt +1241 -0
  7. data/spec/fixtures/text_file.txt +1 -1
  8. data/spec/fixtures/text_file_without_extension +1 -0
  9. data/spec/rika_spec.rb +18 -3
  10. data/{lib → target/dependency}/apache-mime4j-core-0.7.2.jar +0 -0
  11. data/{lib → target/dependency}/apache-mime4j-dom-0.7.2.jar +0 -0
  12. data/{lib → target/dependency}/asm-3.1.jar +0 -0
  13. data/{lib → target/dependency}/aspectjrt-1.6.11.jar +0 -0
  14. data/{lib → target/dependency}/bcmail-jdk15-1.45.jar +0 -0
  15. data/{lib → target/dependency}/bcprov-jdk15-1.45.jar +0 -0
  16. data/{lib → target/dependency}/boilerpipe-1.1.0.jar +0 -0
  17. data/{lib → target/dependency}/commons-codec-1.5.jar +0 -0
  18. data/{lib → target/dependency}/commons-compress-1.4.1.jar +0 -0
  19. data/{lib → target/dependency}/commons-logging-1.1.1.jar +0 -0
  20. data/{lib → target/dependency}/dom4j-1.6.1.jar +0 -0
  21. data/{lib → target/dependency}/fontbox-1.7.0.jar +0 -0
  22. data/target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  23. data/{lib → target/dependency}/isoparser-1.0-RC-1.jar +0 -0
  24. data/target/dependency/jdom-1.0.jar +0 -0
  25. data/{lib → target/dependency}/jempbox-1.7.0.jar +0 -0
  26. data/{lib → target/dependency}/juniversalchardet-1.0.3.jar +0 -0
  27. data/{lib → target/dependency}/metadata-extractor-2.4.0-beta-1.jar +0 -0
  28. data/target/dependency/netcdf-4.2-min.jar +0 -0
  29. data/{lib → target/dependency}/pdfbox-1.7.0.jar +0 -0
  30. data/{lib → target/dependency}/poi-3.8.jar +0 -0
  31. data/{lib → target/dependency}/poi-ooxml-3.8.jar +0 -0
  32. data/{lib → target/dependency}/poi-ooxml-schemas-3.8.jar +0 -0
  33. data/{lib → target/dependency}/poi-scratchpad-3.8.jar +0 -0
  34. data/{lib → target/dependency}/rome-0.9.jar +0 -0
  35. data/target/dependency/slf4j-api-1.5.6.jar +0 -0
  36. data/{lib → target/dependency}/tagsoup-1.2.1.jar +0 -0
  37. data/{lib → target/dependency}/tika-core-1.2.jar +0 -0
  38. data/{lib → target/dependency}/tika-parsers-1.2.jar +0 -0
  39. data/{lib → target/dependency}/vorbis-java-core-0.1-tests.jar +0 -0
  40. data/{lib → target/dependency}/vorbis-java-core-0.1.jar +0 -0
  41. data/{lib → target/dependency}/vorbis-java-tika-0.1.jar +0 -0
  42. data/{lib → target/dependency}/xmlbeans-2.3.0.jar +0 -0
  43. data/{lib → target/dependency}/xz-1.0.jar +0 -0
  44. metadata +41 -32
@@ -1 +1 @@
1
- First they ignore you, then they ridicule you, then they fight you, then you win.
1
+ First they ignore you, then they ridicule you, then they fight you, then you win.
@@ -0,0 +1 @@
1
+ First they ignore you, then they ridicule you, then they fight you, then you win.
data/spec/rika_spec.rb CHANGED
@@ -10,26 +10,41 @@ describe Rika::Parser do
10
10
  @image_parser = Rika::Parser.new(file_path("image.jpg"))
11
11
  end
12
12
 
13
- it "should crash if file does not exists" do
13
+ it "should raise error if file does not exists" do
14
14
  lambda { Rika::Parser.new(file_path("nonsense.txt")) }.should raise_error(IOError, "File does not exist")
15
15
  end
16
16
 
17
+ it "should detect file type without a file extension" do
18
+ parser = Rika::Parser.new(file_path("text_file_without_extension"))
19
+ parser.metadata["Content-Type"].should == "text/plain; charset=ISO-8859-1"
20
+ end
21
+
17
22
  describe '#content' do
18
23
  it "should return the content in a text file" do
19
24
  @txt_parser.content.strip.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
20
25
  end
21
26
 
22
27
  it "should return the content in a docx file" do
23
- @docx_parser.content.strip.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
28
+ @docx_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
24
29
  end
25
30
 
26
31
  it "should return the content in a pdf file" do
27
- @pdf_parser.content.strip.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
32
+ @pdf_parser.content.should == "First they ignore you, then they ridicule you, then they fight you, then you win."
28
33
  end
29
34
 
30
35
  it "should return no content for an image" do
31
36
  @image_parser.content.should be_empty
32
37
  end
38
+
39
+ it "should only return max content length" do
40
+ parser = Rika::Parser.new(file_path("text_file.txt"), 5)
41
+ parser.content.should == "First"
42
+ end
43
+
44
+ it "should be possible to read files over 100k by default" do
45
+ parser = Rika::Parser.new(file_path("over_100k_file.txt"))
46
+ parser.content.length.should == 101_761
47
+ end
33
48
  end
34
49
 
35
50
  # We just test a few of the metadata fields for some common file formats
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
Binary file
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.9.1
5
+ version: 0.9.2
6
6
  platform: java
7
7
  authors:
8
8
  - Richard Nyström
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-16 00:00:00.000000000 Z
12
+ date: 2012-09-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -43,46 +43,53 @@ files:
43
43
  - LICENSE.txt
44
44
  - README.md
45
45
  - Rakefile
46
- - lib/apache-mime4j-core-0.7.2.jar
47
- - lib/apache-mime4j-dom-0.7.2.jar
48
- - lib/asm-3.1.jar
49
- - lib/aspectjrt-1.6.11.jar
50
- - lib/bcmail-jdk15-1.45.jar
51
- - lib/bcprov-jdk15-1.45.jar
52
- - lib/boilerpipe-1.1.0.jar
53
- - lib/commons-codec-1.5.jar
54
- - lib/commons-compress-1.4.1.jar
55
- - lib/commons-logging-1.1.1.jar
56
- - lib/dom4j-1.6.1.jar
57
- - lib/fontbox-1.7.0.jar
58
- - lib/isoparser-1.0-RC-1.jar
59
- - lib/jempbox-1.7.0.jar
60
- - lib/juniversalchardet-1.0.3.jar
61
- - lib/metadata-extractor-2.4.0-beta-1.jar
62
- - lib/pdfbox-1.7.0.jar
63
- - lib/poi-3.8.jar
64
- - lib/poi-ooxml-3.8.jar
65
- - lib/poi-ooxml-schemas-3.8.jar
66
- - lib/poi-scratchpad-3.8.jar
67
46
  - lib/rika.rb
68
47
  - lib/rika/version.rb
69
- - lib/rome-0.9.jar
70
- - lib/tagsoup-1.2.1.jar
71
- - lib/tika-core-1.2.jar
72
- - lib/tika-parsers-1.2.jar
73
- - lib/vorbis-java-core-0.1-tests.jar
74
- - lib/vorbis-java-core-0.1.jar
75
- - lib/vorbis-java-tika-0.1.jar
76
- - lib/xmlbeans-2.3.0.jar
77
- - lib/xz-1.0.jar
48
+ - pom.xml
78
49
  - rika.gemspec
79
50
  - spec/fixtures/document.doc
80
51
  - spec/fixtures/document.docx
81
52
  - spec/fixtures/document.pdf
82
53
  - spec/fixtures/image.jpg
54
+ - spec/fixtures/over_100k_file.txt
83
55
  - spec/fixtures/text_file.txt
56
+ - spec/fixtures/text_file_without_extension
84
57
  - spec/rika_spec.rb
85
58
  - spec/spec_helper.rb
59
+ - target/dependency/apache-mime4j-core-0.7.2.jar
60
+ - target/dependency/apache-mime4j-dom-0.7.2.jar
61
+ - target/dependency/asm-3.1.jar
62
+ - target/dependency/aspectjrt-1.6.11.jar
63
+ - target/dependency/bcmail-jdk15-1.45.jar
64
+ - target/dependency/bcprov-jdk15-1.45.jar
65
+ - target/dependency/boilerpipe-1.1.0.jar
66
+ - target/dependency/commons-codec-1.5.jar
67
+ - target/dependency/commons-compress-1.4.1.jar
68
+ - target/dependency/commons-logging-1.1.1.jar
69
+ - target/dependency/dom4j-1.6.1.jar
70
+ - target/dependency/fontbox-1.7.0.jar
71
+ - target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar
72
+ - target/dependency/isoparser-1.0-RC-1.jar
73
+ - target/dependency/jdom-1.0.jar
74
+ - target/dependency/jempbox-1.7.0.jar
75
+ - target/dependency/juniversalchardet-1.0.3.jar
76
+ - target/dependency/metadata-extractor-2.4.0-beta-1.jar
77
+ - target/dependency/netcdf-4.2-min.jar
78
+ - target/dependency/pdfbox-1.7.0.jar
79
+ - target/dependency/poi-3.8.jar
80
+ - target/dependency/poi-ooxml-3.8.jar
81
+ - target/dependency/poi-ooxml-schemas-3.8.jar
82
+ - target/dependency/poi-scratchpad-3.8.jar
83
+ - target/dependency/rome-0.9.jar
84
+ - target/dependency/slf4j-api-1.5.6.jar
85
+ - target/dependency/tagsoup-1.2.1.jar
86
+ - target/dependency/tika-core-1.2.jar
87
+ - target/dependency/tika-parsers-1.2.jar
88
+ - target/dependency/vorbis-java-core-0.1-tests.jar
89
+ - target/dependency/vorbis-java-core-0.1.jar
90
+ - target/dependency/vorbis-java-tika-0.1.jar
91
+ - target/dependency/xmlbeans-2.3.0.jar
92
+ - target/dependency/xz-1.0.jar
86
93
  homepage: https://github.com/ricn/rika
87
94
  licenses: []
88
95
  post_install_message:
@@ -114,6 +121,8 @@ test_files:
114
121
  - spec/fixtures/document.docx
115
122
  - spec/fixtures/document.pdf
116
123
  - spec/fixtures/image.jpg
124
+ - spec/fixtures/over_100k_file.txt
117
125
  - spec/fixtures/text_file.txt
126
+ - spec/fixtures/text_file_without_extension
118
127
  - spec/rika_spec.rb
119
128
  - spec/spec_helper.rb