cacofonix 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (294) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rvmrc +1 -0
  4. data/.travis.yml +10 -0
  5. data/CHANGELOG.md +205 -0
  6. data/CODE_OF_CONDUCT.md +74 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +76 -0
  10. data/Rakefile +6 -0
  11. data/TODO +14 -0
  12. data/bin/onix_extract_codelists +41 -0
  13. data/cacofonix.gemspec +39 -0
  14. data/data/9780194351898.xml +74 -0
  15. data/data/Ashgate Other.xml +382679 -0
  16. data/data/Bookwise_July_2008.xml +21611 -0
  17. data/data/Peribo Onix 0408.xml +17051 -0
  18. data/data/aau.xml +96 -0
  19. data/data/audience_range.xml +9 -0
  20. data/data/contributor.xml +6 -0
  21. data/data/control_chars.xml +74 -0
  22. data/data/entities.xml +78 -0
  23. data/data/extra_entities.xml +65 -0
  24. data/data/header.xml +21 -0
  25. data/data/header_invalid_sentdate.xml +21 -0
  26. data/data/hlasep08.xml +24519 -0
  27. data/data/imprint.xml +5 -0
  28. data/data/iso_8859_1.xml +58 -0
  29. data/data/jul.xml +24783 -0
  30. data/data/language.xml +7 -0
  31. data/data/market_representation.xml +11 -0
  32. data/data/measure.xml +6 -0
  33. data/data/media_file.xml +6 -0
  34. data/data/no_encoding.xml +78 -0
  35. data/data/no_xml_declaration.xml +95 -0
  36. data/data/other_text.xml +5 -0
  37. data/data/price.xml +5 -0
  38. data/data/product.xml +79 -0
  39. data/data/product_identifier.xml +5 -0
  40. data/data/product_invalid_pubdate.xml +42 -0
  41. data/data/publisher.xml +5 -0
  42. data/data/rba_FANT.xml +23 -0
  43. data/data/reference_with_release_attrib.xml +74 -0
  44. data/data/sales_restriction.xml +4 -0
  45. data/data/sales_rights.xml +35 -0
  46. data/data/sender_identifier.xml +5 -0
  47. data/data/series.xml +5 -0
  48. data/data/series_identifier.xml +6 -0
  49. data/data/short_tags.xml +191 -0
  50. data/data/short_tags_ivp.xml +231 -0
  51. data/data/sl_product.xml +81 -0
  52. data/data/stock.xml +5 -0
  53. data/data/subject.xml +6 -0
  54. data/data/supply_detail.xml +18 -0
  55. data/data/title.xml +6 -0
  56. data/data/two_products.xml +114 -0
  57. data/data/usd.xml +175 -0
  58. data/data/utf_16.xml +0 -0
  59. data/data/website.xml +6 -0
  60. data/examples/reader.rb +25 -0
  61. data/examples/reader_apa.rb +22 -0
  62. data/examples/writer.rb +13 -0
  63. data/examples/writer_apa.rb +38 -0
  64. data/lib/cacofonix.rb +151 -0
  65. data/lib/cacofonix/codelists/001.rb +16 -0
  66. data/lib/cacofonix/codelists/002.rb +11 -0
  67. data/lib/cacofonix/codelists/003.rb +13 -0
  68. data/lib/cacofonix/codelists/005.rb +20 -0
  69. data/lib/cacofonix/codelists/006.rb +82 -0
  70. data/lib/cacofonix/codelists/007.rb +129 -0
  71. data/lib/cacofonix/codelists/008.rb +13 -0
  72. data/lib/cacofonix/codelists/009.rb +13 -0
  73. data/lib/cacofonix/codelists/010.rb +52 -0
  74. data/lib/cacofonix/codelists/011.rb +17 -0
  75. data/lib/cacofonix/codelists/012.rb +17 -0
  76. data/lib/cacofonix/codelists/013.rb +13 -0
  77. data/lib/cacofonix/codelists/014.rb +10 -0
  78. data/lib/cacofonix/codelists/015.rb +19 -0
  79. data/lib/cacofonix/codelists/016.rb +12 -0
  80. data/lib/cacofonix/codelists/017.rb +96 -0
  81. data/lib/cacofonix/codelists/018.rb +11 -0
  82. data/lib/cacofonix/codelists/019.rb +13 -0
  83. data/lib/cacofonix/codelists/020.rb +7 -0
  84. data/lib/cacofonix/codelists/021.rb +39 -0
  85. data/lib/cacofonix/codelists/022.rb +15 -0
  86. data/lib/cacofonix/codelists/023.rb +19 -0
  87. data/lib/cacofonix/codelists/024.rb +17 -0
  88. data/lib/cacofonix/codelists/025.rb +35 -0
  89. data/lib/cacofonix/codelists/026.rb +86 -0
  90. data/lib/cacofonix/codelists/027.rb +90 -0
  91. data/lib/cacofonix/codelists/028.rb +14 -0
  92. data/lib/cacofonix/codelists/029.rb +25 -0
  93. data/lib/cacofonix/codelists/030.rb +22 -0
  94. data/lib/cacofonix/codelists/031.rb +9 -0
  95. data/lib/cacofonix/codelists/032.rb +8 -0
  96. data/lib/cacofonix/codelists/033.rb +43 -0
  97. data/lib/cacofonix/codelists/034.rb +21 -0
  98. data/lib/cacofonix/codelists/035.rb +12 -0
  99. data/lib/cacofonix/codelists/036.rb +9 -0
  100. data/lib/cacofonix/codelists/037.rb +12 -0
  101. data/lib/cacofonix/codelists/038.rb +43 -0
  102. data/lib/cacofonix/codelists/039.rb +14 -0
  103. data/lib/cacofonix/codelists/040.rb +12 -0
  104. data/lib/cacofonix/codelists/041.rb +12 -0
  105. data/lib/cacofonix/codelists/042.rb +22 -0
  106. data/lib/cacofonix/codelists/043.rb +12 -0
  107. data/lib/cacofonix/codelists/044.rb +22 -0
  108. data/lib/cacofonix/codelists/045.rb +19 -0
  109. data/lib/cacofonix/codelists/046.rb +15 -0
  110. data/lib/cacofonix/codelists/047.rb +10 -0
  111. data/lib/cacofonix/codelists/048.rb +17 -0
  112. data/lib/cacofonix/codelists/049.rb +91 -0
  113. data/lib/cacofonix/codelists/050.rb +14 -0
  114. data/lib/cacofonix/codelists/051.rb +36 -0
  115. data/lib/cacofonix/codelists/052.rb +7 -0
  116. data/lib/cacofonix/codelists/053.rb +10 -0
  117. data/lib/cacofonix/codelists/054.rb +29 -0
  118. data/lib/cacofonix/codelists/055.rb +23 -0
  119. data/lib/cacofonix/codelists/056.rb +8 -0
  120. data/lib/cacofonix/codelists/057.rb +11 -0
  121. data/lib/cacofonix/codelists/058.rb +29 -0
  122. data/lib/cacofonix/codelists/059.rb +13 -0
  123. data/lib/cacofonix/codelists/060.rb +8 -0
  124. data/lib/cacofonix/codelists/061.rb +9 -0
  125. data/lib/cacofonix/codelists/062.rb +11 -0
  126. data/lib/cacofonix/codelists/063.rb +7 -0
  127. data/lib/cacofonix/codelists/064.rb +19 -0
  128. data/lib/cacofonix/codelists/065.rb +34 -0
  129. data/lib/cacofonix/codelists/066.rb +10 -0
  130. data/lib/cacofonix/codelists/067.rb +8 -0
  131. data/lib/cacofonix/codelists/068.rb +22 -0
  132. data/lib/cacofonix/codelists/069.rb +10 -0
  133. data/lib/cacofonix/codelists/070.rb +8 -0
  134. data/lib/cacofonix/codelists/071.rb +16 -0
  135. data/lib/cacofonix/codelists/072.rb +13 -0
  136. data/lib/cacofonix/codelists/073.rb +42 -0
  137. data/lib/cacofonix/codelists/074.rb +495 -0
  138. data/lib/cacofonix/codelists/075.rb +8 -0
  139. data/lib/cacofonix/codelists/076.rb +18 -0
  140. data/lib/cacofonix/codelists/077.rb +25 -0
  141. data/lib/cacofonix/codelists/078.rb +173 -0
  142. data/lib/cacofonix/codelists/079.rb +25 -0
  143. data/lib/cacofonix/codelists/080.rb +26 -0
  144. data/lib/cacofonix/codelists/081.rb +45 -0
  145. data/lib/cacofonix/codelists/082.rb +23 -0
  146. data/lib/cacofonix/codelists/083.rb +74 -0
  147. data/lib/cacofonix/codelists/084.rb +16 -0
  148. data/lib/cacofonix/codelists/085.rb +29 -0
  149. data/lib/cacofonix/codelists/086.rb +11 -0
  150. data/lib/cacofonix/codelists/087.rb +12 -0
  151. data/lib/cacofonix/codelists/088.rb +7 -0
  152. data/lib/cacofonix/codelists/089.rb +7 -0
  153. data/lib/cacofonix/codelists/090.rb +17 -0
  154. data/lib/cacofonix/codelists/091.rb +257 -0
  155. data/lib/cacofonix/codelists/092.rb +14 -0
  156. data/lib/cacofonix/codelists/093.rb +19 -0
  157. data/lib/cacofonix/codelists/094.rb +9 -0
  158. data/lib/cacofonix/codelists/095.rb +9 -0
  159. data/lib/cacofonix/codelists/096.rb +191 -0
  160. data/lib/cacofonix/codelists/097.rb +7 -0
  161. data/lib/cacofonix/codelists/098.rb +27 -0
  162. data/lib/cacofonix/codelists/099.rb +30 -0
  163. data/lib/cacofonix/codelists/100.rb +12 -0
  164. data/lib/cacofonix/codelists/101.rb +8 -0
  165. data/lib/cacofonix/codelists/102.rb +9 -0
  166. data/lib/cacofonix/codelists/121.rb +164 -0
  167. data/lib/cacofonix/codelists/138.rb +12 -0
  168. data/lib/cacofonix/codelists/139.rb +76 -0
  169. data/lib/cacofonix/codelists/140.rb +13 -0
  170. data/lib/cacofonix/codelists/141.rb +16 -0
  171. data/lib/cacofonix/codelists/142.rb +18 -0
  172. data/lib/cacofonix/codelists/143.rb +13 -0
  173. data/lib/cacofonix/codelists/144.rb +12 -0
  174. data/lib/cacofonix/codelists/145.rb +13 -0
  175. data/lib/cacofonix/codelists/146.rb +9 -0
  176. data/lib/cacofonix/codelists/147.rb +16 -0
  177. data/lib/cacofonix/codelists/148.rb +9 -0
  178. data/lib/cacofonix/codelists/149.rb +10 -0
  179. data/lib/cacofonix/codelists/150.rb +130 -0
  180. data/lib/cacofonix/codelists/151.rb +13 -0
  181. data/lib/cacofonix/codelists/152.rb +8 -0
  182. data/lib/cacofonix/codelists/153.rb +20 -0
  183. data/lib/cacofonix/codelists/154.rb +15 -0
  184. data/lib/cacofonix/codelists/155.rb +11 -0
  185. data/lib/cacofonix/codelists/156.rb +10 -0
  186. data/lib/cacofonix/codelists/157.rb +10 -0
  187. data/lib/cacofonix/codelists/158.rb +32 -0
  188. data/lib/cacofonix/codelists/159.rb +12 -0
  189. data/lib/cacofonix/codelists/160.rb +10 -0
  190. data/lib/cacofonix/codelists/161.rb +9 -0
  191. data/lib/cacofonix/codelists/162.rb +12 -0
  192. data/lib/cacofonix/codelists/163.rb +17 -0
  193. data/lib/cacofonix/codelists/164.rb +11 -0
  194. data/lib/cacofonix/codelists/165.rb +9 -0
  195. data/lib/cacofonix/codelists/166.rb +9 -0
  196. data/lib/cacofonix/codelists/167.rb +9 -0
  197. data/lib/cacofonix/codelists/168.rb +8 -0
  198. data/lib/cacofonix/codelists/169.rb +9 -0
  199. data/lib/cacofonix/codelists/170.rb +7 -0
  200. data/lib/cacofonix/codelists/171.rb +8 -0
  201. data/lib/cacofonix/codelists/172.rb +7 -0
  202. data/lib/cacofonix/codelists/173.rb +8 -0
  203. data/lib/cacofonix/codelists/174.rb +8 -0
  204. data/lib/cacofonix/codelists/175.rb +206 -0
  205. data/lib/cacofonix/codelists/176.rb +20 -0
  206. data/lib/cacofonix/codelists/177.rb +8 -0
  207. data/lib/cacofonix/codelists/178.rb +24 -0
  208. data/lib/cacofonix/codelists/179.rb +7 -0
  209. data/lib/cacofonix/codelists/184.rb +11 -0
  210. data/lib/cacofonix/core/code.rb +106 -0
  211. data/lib/cacofonix/core/element.rb +275 -0
  212. data/lib/cacofonix/core/header.rb +45 -0
  213. data/lib/cacofonix/core/lists.rb +108 -0
  214. data/lib/cacofonix/core/reader.rb +166 -0
  215. data/lib/cacofonix/core/writer.rb +123 -0
  216. data/lib/cacofonix/elements/addressee_identifier.rb +6 -0
  217. data/lib/cacofonix/elements/agent_identifier.rb +6 -0
  218. data/lib/cacofonix/elements/audience.rb +8 -0
  219. data/lib/cacofonix/elements/audience_range.rb +15 -0
  220. data/lib/cacofonix/elements/batch_bonus.rb +7 -0
  221. data/lib/cacofonix/elements/bible.rb +12 -0
  222. data/lib/cacofonix/elements/complexity.rb +7 -0
  223. data/lib/cacofonix/elements/conference.rb +14 -0
  224. data/lib/cacofonix/elements/conference_sponsor.rb +8 -0
  225. data/lib/cacofonix/elements/conference_sponsor_identifier.rb +6 -0
  226. data/lib/cacofonix/elements/contained_item.rb +6 -0
  227. data/lib/cacofonix/elements/content_item.rb +21 -0
  228. data/lib/cacofonix/elements/contributor.rb +19 -0
  229. data/lib/cacofonix/elements/copyright_owner.rb +8 -0
  230. data/lib/cacofonix/elements/copyright_owner_identifier.rb +6 -0
  231. data/lib/cacofonix/elements/copyright_statement.rb +7 -0
  232. data/lib/cacofonix/elements/discount_coded.rb +8 -0
  233. data/lib/cacofonix/elements/extent.rb +8 -0
  234. data/lib/cacofonix/elements/identifier.rb +7 -0
  235. data/lib/cacofonix/elements/illustrations.rb +8 -0
  236. data/lib/cacofonix/elements/imprint.rb +9 -0
  237. data/lib/cacofonix/elements/language.rb +8 -0
  238. data/lib/cacofonix/elements/location_identifier.rb +8 -0
  239. data/lib/cacofonix/elements/main_subject.rb +9 -0
  240. data/lib/cacofonix/elements/market_date.rb +8 -0
  241. data/lib/cacofonix/elements/market_representation.rb +18 -0
  242. data/lib/cacofonix/elements/measure.rb +8 -0
  243. data/lib/cacofonix/elements/media_file.rb +16 -0
  244. data/lib/cacofonix/elements/name.rb +6 -0
  245. data/lib/cacofonix/elements/name_base.rb +17 -0
  246. data/lib/cacofonix/elements/new_supplier.rb +12 -0
  247. data/lib/cacofonix/elements/not_for_sale.rb +13 -0
  248. data/lib/cacofonix/elements/on_order_detail.rb +7 -0
  249. data/lib/cacofonix/elements/other_text.rb +17 -0
  250. data/lib/cacofonix/elements/page_run.rb +7 -0
  251. data/lib/cacofonix/elements/person_date.rb +8 -0
  252. data/lib/cacofonix/elements/person_name_identifier.rb +6 -0
  253. data/lib/cacofonix/elements/price.rb +46 -0
  254. data/lib/cacofonix/elements/prize.rb +10 -0
  255. data/lib/cacofonix/elements/product.rb +155 -0
  256. data/lib/cacofonix/elements/product_base.rb +61 -0
  257. data/lib/cacofonix/elements/product_classification.rb +8 -0
  258. data/lib/cacofonix/elements/product_form_feature.rb +8 -0
  259. data/lib/cacofonix/elements/product_identifier.rb +6 -0
  260. data/lib/cacofonix/elements/professional_affiliation.rb +7 -0
  261. data/lib/cacofonix/elements/publisher.rb +11 -0
  262. data/lib/cacofonix/elements/reissue.rb +9 -0
  263. data/lib/cacofonix/elements/related_product.rb +6 -0
  264. data/lib/cacofonix/elements/religious_text.rb +8 -0
  265. data/lib/cacofonix/elements/religious_text_feature.rb +8 -0
  266. data/lib/cacofonix/elements/sales_outlet.rb +7 -0
  267. data/lib/cacofonix/elements/sales_outlet_identifier.rb +6 -0
  268. data/lib/cacofonix/elements/sales_restriction.rb +7 -0
  269. data/lib/cacofonix/elements/sales_rights.rb +13 -0
  270. data/lib/cacofonix/elements/sender_identifier.rb +6 -0
  271. data/lib/cacofonix/elements/series.rb +11 -0
  272. data/lib/cacofonix/elements/series_identifier.rb +6 -0
  273. data/lib/cacofonix/elements/set.rb +13 -0
  274. data/lib/cacofonix/elements/stock.rb +16 -0
  275. data/lib/cacofonix/elements/stock_quantity_coded.rb +8 -0
  276. data/lib/cacofonix/elements/subject.rb +13 -0
  277. data/lib/cacofonix/elements/supplier_identifier.rb +6 -0
  278. data/lib/cacofonix/elements/supply_detail.rb +37 -0
  279. data/lib/cacofonix/elements/text_item.rb +11 -0
  280. data/lib/cacofonix/elements/text_item_identifier.rb +6 -0
  281. data/lib/cacofonix/elements/title.rb +11 -0
  282. data/lib/cacofonix/elements/website.rb +8 -0
  283. data/lib/cacofonix/elements/work_identifier.rb +6 -0
  284. data/lib/cacofonix/utils/code_list_extractor.rb +112 -0
  285. data/lib/cacofonix/utils/normaliser.rb +125 -0
  286. data/lib/cacofonix/version.rb +5 -0
  287. data/lib/cacofonix/wrappers/apa_product.rb +748 -0
  288. data/lib/cacofonix/wrappers/simple_product.rb +49 -0
  289. data/support/entities.txt +1499 -0
  290. data/support/extract.rb +25 -0
  291. data/support/switch-onix-2.1-short-to-reference.xsl +24 -0
  292. data/support/switch-onix-tagnames-1.1.xsl +25 -0
  293. data/support/switch-onix-tagnames-2.0.xsl +37 -0
  294. metadata +438 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7d71358bce0d5d281f6d8ea181ea0a8b50ea74acd5b5df2e6079d2ab4d98a6fe
4
+ data.tar.gz: b1802248185109acad0b4cf4951fc701bbb841fc38dc3935da5e519e5db70148
5
+ SHA512:
6
+ metadata.gz: 335a075c708ad440c71554dc03ef92201dd797dad92ef9d6ac29f729ebdfc4ca46022b6111a325834acecc69d169645b2f512c86a7b1cd9f465b15ab98a32eb4
7
+ data.tar.gz: aae5a765f88eaa478b7bce6c9590f4900922f778d298ab8de47c605f8225cd74cf7ddfd6f1e29dcc6ca1ddb5719dd3351ddbcda3cd52dd2e011da67f2670f644
@@ -0,0 +1,2 @@
1
+ pkg
2
+ Gemfile.lock
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use ree@onix
@@ -0,0 +1,10 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.6.3
7
+ - 2.5.5
8
+ before_install:
9
+ - sudo apt-get install -y xsltproc
10
+ - gem install bundler -v 2.0.2
@@ -0,0 +1,205 @@
1
+ # Changelog
2
+
3
+ ## [0.10.0] - 2019-07-08
4
+
5
+ ### Changed
6
+
7
+ - Rename gem from onix to cacofonix
8
+ - Rename primary module namespace from `ONIX` to `Cacofonix`
9
+ - Use Cacofonix::DTDs to set appropriate env vars before XML parsing; required for ONIX 2.1 DTDs to be properly loaded and related entities in source XML files to be respected
10
+ - Address Fixnum->Integer and BigDecimal.new->BigDecimal deprecations
11
+ - Require Ruby 2.5 or newer
12
+
13
+ # Previous releases (as onix gem)
14
+
15
+ v0.9.1 (5th September 2011)
16
+ - relax activesupport dependency to work with rails 3 or 3.1
17
+
18
+ v0.9.0 (14th April 2011)
19
+ - switch back to the vanilla roxml gem. Ben is maintaining it again and
20
+ he has merged in my bug fixes
21
+ - clarify comments explaining encoding behaviour
22
+ - Add options hash to ONIX::Reader. Only option at this stage is :encoding,
23
+ which allows the user to override the assumed encoding of the input XML
24
+ - API change, so new minor version
25
+
26
+ v0.8.5 (21st December 2010)
27
+ - update packaging - use bundler and rspec 2.x
28
+ - support normalising short tag files that include HTML tags
29
+
30
+ v0.8.4 (18th October 2010)
31
+ - some small fixes to xml names from Tim
32
+ - make all code lists available via the ONIX::Lists class
33
+
34
+ v0.8.3 (9th September 2010)
35
+ - Fix for race condition in ONIX::Normaliser
36
+ - thanks to pixelvixen for reporting
37
+ - force roxml to be 3.1.6 or higher. Earlier versions misbehaved when monkey
38
+ patching nokogiri
39
+
40
+ v0.8.2 (6th May 2010)
41
+ - fix APAProduct#series and APAProduct#series=
42
+
43
+ v0.8.1 (5th January 2010)
44
+ - Use nokogiri's support for transparent entity conversion when reading an ONIX file
45
+ - Removed entity replacement from ONIX::Normaliser
46
+ - the external dependency on sed made me uncomfortable, and it wasn't really
47
+ necessary now that nokogiri can do it for us
48
+ - Removed utf-8 normalisation from ONIX::Normaliser
49
+ - nokogiri also handles this really cleanly and transparently. Regardless of
50
+ the source file encoding, Nokogiri::Reader returns utf-8 encoded data
51
+ - Add the release attribute to files we generate
52
+ - it's optional in 2.1, but mandatory in 3.0. As we start to see 3.0 files in the
53
+ wild it will help to have a rapid way to distinguish between them
54
+ - Add ONIX::Reader#release - to detect the release version of files we read in
55
+
56
+ v0.8.0 (31st October 2009)
57
+ - Replace LibXML dependency with Nokogiri. Nokogiri is under active development, has
58
+ a responsive maintainer and is significantly more stable
59
+ - Switch to ROXML 3.x
60
+ - roxml also switched from libxml to nokogiri
61
+ - roxml removed deprecated parts of it's API
62
+ - should now avoid various conflicts with mongrel
63
+ - Ensure APAProduct#price returns the first product price and ignores
64
+ the price type
65
+
66
+ v0.7.8 (19th October 2009)
67
+ - add support for additional elements (mostly series and audience related)
68
+ - thanks tim
69
+
70
+ v0.7.7 (1st October 2009)
71
+ - optimise sed usage in ONIX::Normaliser. *huge* speed improvement on
72
+ large files.
73
+
74
+ v0.7.6 (21st September 2009)
75
+ - provide access to the PackQuantity element
76
+
77
+ v0.7.5 (8th September 2009)
78
+ - Don't raise an exception on malformed dates when reading files
79
+
80
+ v0.7.4 (2nd September 2009)
81
+ - Expand ONIX::Normaliser
82
+ - strip control chars
83
+ - add encoding declaration to valid utf-8 files that aren't declared
84
+ as such
85
+
86
+ v0.7.3 (19th August 2009)
87
+ - Switch from java to xsltproc to convert short tag ONIX files
88
+ to reference tags
89
+
90
+ v0.7.2 (19th August 2009)
91
+ - Added ONIX::Normaliser class
92
+ - for normalising various ONIX files into a form that makes them easy
93
+ to process. Shouldn't be necesary to pre-process files like this, but
94
+ I'm sick of trying to wrestle the libxml ruby bindings
95
+
96
+ v0.7.1 (24th June 2009)
97
+ - Small tweak to ordering of elements in the Product group
98
+
99
+ v0.7.0 (17th June 2009)
100
+ - try using LibXML for reader again
101
+ - retrieving the ONIX version of the input file is currently disabled, as
102
+ that seems to be the source of our instability
103
+ - Various Ruby 1.9 compatability tweaks
104
+ - add source file coding declarations. All source files are UTF-8
105
+ - ONIX::Reader ensures all input data is converted to UTF-8
106
+ - the ROXML based objects seem to forget the encoding when they're marshalled,
107
+ so force string based attributes *back* to UTF-8
108
+
109
+ v0.6.7 (Unreleased)
110
+ - add some accessors to the Title composite
111
+
112
+ v0.6.6 (Unreleased)
113
+ - Forget the S on an element name
114
+
115
+ v0.6.5 (Unreleased)
116
+ - Ruby 1.9 compat
117
+
118
+ v0.6.4 (Unreleased)
119
+ - Add APAProduct#price
120
+
121
+ v0.6.3(Unreleased)
122
+ - Bump ROXML dependency to 2.5.3 to get libxml-ruby 1.1.3 compatibility
123
+
124
+ v0.6.2 (Unreleased)
125
+ - Fix a small typo in APAProduct
126
+
127
+ v0.6.1 (Unreleased)
128
+ - Stopped using LibXMLs Reader class as the basis for our reader.
129
+ - We were getting too many segfaults (even 1 is too many!)
130
+ - until we resolve it, reverted to manual string parsing
131
+ - This is a fairly major regression of functionality. For 99% of files
132
+ it won't matter, but for some corner cases it will. ie UTF-16 encoded
133
+ files
134
+ - Will also be noticeably slower
135
+ - Hopefully only a short term fix, until I work out what is going on with
136
+ libxml
137
+
138
+ v0.6.0 (18th March 2009)
139
+ - remove use of threads in ONIX::Reader
140
+ - a producer/consumer pattern was useful in the REXML stream parsing days, but
141
+ now LibXML's Reader binding provides a better alternative
142
+ - API left unchanged, this was all under the hood
143
+ - bump required ROXML version to 2.5.2
144
+
145
+ v0.5.1 (4th March 2009)
146
+ - Fix a single letter typo
147
+
148
+ v0.5 (2nd March 2009)
149
+ - Switch ROXML dependency from a patched version to vanilla
150
+ - Vanilla ROXML now has all the features we need
151
+ - This change should be transparent to ONIX gem users
152
+
153
+ v0.4.7 (9th December 2008)
154
+ - Contributor sub elements should match the order specified in the DTD
155
+
156
+ v0.4.6 (2nd December 2008)
157
+ - 2 new accessors on the contributor class - bio and corporate name
158
+
159
+ v0.4.5 (21st November 2008)
160
+ - APAProduct wrapper should generate valid MediaFile composites
161
+
162
+ v0.4.4 (19th November 2008)
163
+ - Added support for more elements from MarketRepresentation
164
+
165
+ v0.4.3 (11th November 2008)
166
+ - Added support for AgentName and MarketPublishingStatus
167
+
168
+ v0.4.2 (1st November 2008)
169
+ - Remove final remnants of REXML code
170
+ - Minor reordering of elements to match DTD
171
+
172
+ v0.4.1 (UNRELEASED)
173
+ - Added accessors to various product measurements. Height, weight, etc.
174
+ - Reduced time for an ONIX::Reader class to initialise
175
+
176
+ v0.4.0 (28th October 2008)
177
+ - Major rework: now based on ROXML instead of xml-mapping
178
+ - Mostly API Compatible
179
+ - StreamReader and StreamWriter renamed to Reader and Writer
180
+ - ROXML is based on libxml, so things should be significantly faster
181
+
182
+ v0.2.7 (Unreleased)
183
+ - Add line breaks after each product
184
+
185
+ v0.2.5 (Unreleased)
186
+ - Make PublishingStatus a two_digit_node
187
+
188
+ v0.2.4 (Unreleased)
189
+ - Initialise the media files array of a new product correctly
190
+
191
+ v0.2.3 (Unreleased)
192
+ - Switch a few more fields over to TwoDigitNodes
193
+ - Make the product availability field accessible from APAProduct
194
+
195
+ v0.2.2 (Unreleased)
196
+ - Add a new nodetype (DateNode) for YYYYMMDD fields
197
+
198
+ v0.2.1 (Unreleased)
199
+ - Add a new nodetype (TwoDigitNode) for two digit codes
200
+
201
+ v0.2.0 (16th July 2008)
202
+ - Add support for reading and storing subject codes (BIC, BISAC, etc)
203
+
204
+ v0.1.0 (12th June 2008)
205
+ - Initial Release
@@ -0,0 +1,74 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to making participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, gender identity and expression, level of experience,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ ## Our Standards
13
+
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
16
+
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
22
+
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ## Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ## Scope
47
+
48
+ This Code of Conduct applies both within project spaces and in public spaces
49
+ when an individual is representing the project or its community. Examples of
50
+ representing a project or community include using an official project e-mail
51
+ address, posting via an official social media account, or acting as an appointed
52
+ representative at an online or offline event. Representation of a project may be
53
+ further defined and clarified by project maintainers.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at tim(at)icelab.com.au. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71
+ available at [http://contributor-covenant.org/version/1/4][version]
72
+
73
+ [homepage]: http://contributor-covenant.org
74
+ [version]: http://contributor-covenant.org/version/1/4/
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ gem "pry-byebug", platform: :mri
@@ -0,0 +1,22 @@
1
+ Copyright (c)
2
+ 2008-2011 James Healy
3
+ 2011-2012 Joseph Pearson
4
+ 2019 Icelab
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
@@ -0,0 +1,76 @@
1
+ # Cacofonix
2
+
3
+ Cacofonix helps you work with ONIX data in your Ruby applications. ONIX is a set
4
+ of (XML-based) standards for published products. Cacofonix focuses on the _[ONIX
5
+ for Books][onix_standard]_ standard, and currently supports ONIX 2.1 (all
6
+ revisions).
7
+
8
+ With Cacofonix, you can:
9
+
10
+ - Efficiently read ONIX files
11
+ - Map records in ONIX files to Ruby objects
12
+ - Write ONIX files
13
+
14
+ Cacofonix is an updated, maintained fork of James Healy's original [onix
15
+ gem][gem_original], also incorporating Joseph Pearson's [major
16
+ refactorings][gem_joseph] and expanded support for the standard.
17
+
18
+ [onix_standard]: https://www.editeur.org/11/Books/
19
+ [gem_original]: https://github.com/yob/onix
20
+ [gem_joseph]: https://github.com/yob/onix/pull/3
21
+
22
+ ## Installation
23
+
24
+ Add this line to your application's `Gemfile`:
25
+
26
+ ```ruby
27
+ gem "cacofonix"
28
+ ```
29
+
30
+ And then execute:
31
+
32
+ ```shell
33
+ $ bundle
34
+ ```
35
+
36
+ Or install it yourself as:
37
+
38
+ ```shell
39
+ $ gem install cacofonix
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ See the files in the [examples/][examples] directory to get started quickly.
45
+
46
+ For more detail, view the comments in the following classes:
47
+
48
+ * [`Cacofonix::Reader`][reader] - For reading ONIX files
49
+ * [`Cacofonix::Writer`][writer] - For writing ONIX files
50
+ * [`Cacofonix::Normaliser`][normaliser] - For normalising ONIX files before
51
+ reading (Fixes encoding issues, etc.)
52
+ * [`Cacofonix::Lists`][lists] - For building hashes of code lists from the ONIX
53
+ spec
54
+
55
+ [examples]: examples
56
+ [reader]: lib/cacofonix/core/reader.rb
57
+ [writer]: lib/cacofonix/core/writer.rb
58
+ [normaliser]: lib/cacofonix/utils/normaliser.rb
59
+ [lists]: lib/cacofonix/core/lists.rb
60
+
61
+ ## ONIX support
62
+
63
+ `Cacofonix::Reader` only handles the reference tag versions of ONIX 2.1. Use
64
+ `Cacofonix::Normaliser` to convert any short tag files to reference tags.
65
+
66
+ `Cacofonix::Writer` only generates reference tag ONIX files.
67
+
68
+ ## License
69
+
70
+ The gem is available as open source under the terms of the [MIT
71
+ License](https://opensource.org/licenses/MIT).
72
+
73
+ ## Code of Conduct
74
+
75
+ Everyone interacting in the Cacofonix codebase, issue trackers, chat rooms and
76
+ mailing lists is expected to follow the [code of conduct](CODE_OF_CONDUCT.md).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/TODO ADDED
@@ -0,0 +1,14 @@
1
+ - finish adding support for all tags in a Product record
2
+ - contributor still needs some obscure tags added
3
+ - add a new node type for dates that should be in the form YYYYMMDD
4
+ - allow symbols to be used instead of numeric codes
5
+ - contribution type, id type, etc
6
+ - validations
7
+ - hard stuff from the spec
8
+ - 2 digit numbers, 8 digit dates, char limits on text fields
9
+ - code lists
10
+ - only values in code lists allowed in relevant fields
11
+ - regional requirements
12
+ - titlepage minimum set of tags
13
+ - APA (if different to titlepage)
14
+ - documentation
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/ruby
2
+ # coding: utf-8
3
+
4
+ # NB: When testing locally, you can run this as:
5
+ #
6
+ # ruby -Ilib bin/onix_extract_codelists [options] xsd dir
7
+
8
+ require 'optparse'
9
+
10
+ def parse_options
11
+ options = {}
12
+ optparse = OptionParser.new do |parser|
13
+ parser.banner = [
14
+ "Usage: onix_extract_codelists [options] xsd_path output_dir_path",
15
+ "",
16
+ " eg: onix_extract_codelists ONIX_BookProduct_CodeLists.xsd support/codes"
17
+ ].join("\n")
18
+
19
+ options[:format] = :tsv
20
+ parser.on("-f", "--format FMT", "Write to format (TSV or Ruby)") do |fmt|
21
+ options[:format] = fmt.downcase.to_sym
22
+ end
23
+
24
+ parser.on("-h", "--help", "Display this usage information") do
25
+ puts parser
26
+ exit
27
+ end
28
+ end
29
+ optparse.parse!
30
+ unless ARGV.size == 2
31
+ puts optparse
32
+ exit(1)
33
+ end
34
+ options
35
+ end
36
+
37
+ options = parse_options
38
+
39
+ require "cacofonix"
40
+ extractor = Cacofonix::CodeListExtractor.new(ARGV.shift, options[:format])
41
+ extractor.run(ARGV.shift)