bio-ucsc-api 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (547) hide show
  1. data/Gemfile +4 -3
  2. data/Gemfile.lock +31 -20
  3. data/README.md +224 -0
  4. data/Rakefile +5 -8
  5. data/VERSION +1 -1
  6. data/bio-ucsc-api.gemspec +39 -122
  7. data/lib/bio-ucsc.rb +10 -5
  8. data/lib/bio-ucsc/ailmel1.rb +5 -3
  9. data/lib/bio-ucsc/anocar2.rb +7 -6
  10. data/lib/bio-ucsc/anogam1.rb +7 -5
  11. data/lib/bio-ucsc/anogam1/chaindm3.rb +11 -8
  12. data/lib/bio-ucsc/anogam1/chaindm3link.rb +11 -8
  13. data/lib/bio-ucsc/anogam1/est.rb +11 -8
  14. data/lib/bio-ucsc/anogam1/gap.rb +11 -8
  15. data/lib/bio-ucsc/anogam1/gold.rb +11 -8
  16. data/lib/bio-ucsc/anogam1/intronest.rb +11 -8
  17. data/lib/bio-ucsc/anogam1/mrna.rb +11 -8
  18. data/lib/bio-ucsc/anogam1/rmsk.rb +10 -7
  19. data/lib/bio-ucsc/apimel2.rb +5 -3
  20. data/lib/bio-ucsc/apimel2/chaindm2.rb +11 -8
  21. data/lib/bio-ucsc/apimel2/chaindm2link.rb +11 -8
  22. data/lib/bio-ucsc/aplcal1.rb +5 -3
  23. data/lib/bio-ucsc/aplcal1/chaincaepb1.rb +11 -8
  24. data/lib/bio-ucsc/aplcal1/chaincaepb1link.rb +11 -8
  25. data/lib/bio-ucsc/aplcal1/chaincaerem2.rb +11 -8
  26. data/lib/bio-ucsc/aplcal1/chaincaerem2link.rb +11 -8
  27. data/lib/bio-ucsc/aplcal1/chaincb3.rb +11 -8
  28. data/lib/bio-ucsc/aplcal1/chaincb3link.rb +11 -8
  29. data/lib/bio-ucsc/aplcal1/chaince6.rb +11 -8
  30. data/lib/bio-ucsc/aplcal1/chaince6link.rb +11 -8
  31. data/lib/bio-ucsc/aplcal1/gap.rb +11 -8
  32. data/lib/bio-ucsc/aplcal1/gold.rb +11 -8
  33. data/lib/bio-ucsc/aplcal1/rmsk.rb +10 -7
  34. data/lib/bio-ucsc/bostau4.rb +5 -3
  35. data/lib/bio-ucsc/braflo1.rb +5 -3
  36. data/lib/bio-ucsc/braflo1/chaingalgal3.rb +11 -8
  37. data/lib/bio-ucsc/braflo1/chaingalgal3link.rb +11 -8
  38. data/lib/bio-ucsc/braflo1/chainhg18.rb +11 -8
  39. data/lib/bio-ucsc/braflo1/chainhg18link.rb +11 -8
  40. data/lib/bio-ucsc/braflo1/chainmm9.rb +11 -8
  41. data/lib/bio-ucsc/braflo1/chainmm9link.rb +11 -8
  42. data/lib/bio-ucsc/braflo1/chainpetmar1.rb +11 -8
  43. data/lib/bio-ucsc/braflo1/chainpetmar1link.rb +11 -8
  44. data/lib/bio-ucsc/braflo1/gap.rb +11 -8
  45. data/lib/bio-ucsc/braflo1/gold.rb +11 -8
  46. data/lib/bio-ucsc/caejap1.rb +5 -3
  47. data/lib/bio-ucsc/caejap1/chaince6.rb +11 -8
  48. data/lib/bio-ucsc/caejap1/chaince6link.rb +11 -8
  49. data/lib/bio-ucsc/caejap1/gap.rb +11 -8
  50. data/lib/bio-ucsc/caejap1/gold.rb +11 -8
  51. data/lib/bio-ucsc/caepb2.rb +6 -4
  52. data/lib/bio-ucsc/caepb2/chaince6.rb +11 -8
  53. data/lib/bio-ucsc/caepb2/chaince6link.rb +11 -8
  54. data/lib/bio-ucsc/caepb2/gap.rb +11 -8
  55. data/lib/bio-ucsc/caepb2/gold.rb +11 -8
  56. data/lib/bio-ucsc/caerem3.rb +5 -3
  57. data/lib/bio-ucsc/caerem3/chaince6.rb +11 -8
  58. data/lib/bio-ucsc/caerem3/chaince6link.rb +11 -8
  59. data/lib/bio-ucsc/caerem3/gap.rb +11 -8
  60. data/lib/bio-ucsc/caerem3/gold.rb +11 -8
  61. data/lib/bio-ucsc/caljac3.rb +7 -4
  62. data/lib/bio-ucsc/canfam2.rb +5 -3
  63. data/lib/bio-ucsc/canfam2/chainbostau4.rb +11 -8
  64. data/lib/bio-ucsc/canfam2/chainbostau4link.rb +11 -8
  65. data/lib/bio-ucsc/canfam2/chainmm9.rb +11 -8
  66. data/lib/bio-ucsc/canfam2/chainmm9link.rb +11 -8
  67. data/lib/bio-ucsc/canfam2/chainrn4.rb +11 -8
  68. data/lib/bio-ucsc/canfam2/chainrn4link.rb +11 -8
  69. data/lib/bio-ucsc/canfam2/chainself.rb +11 -8
  70. data/lib/bio-ucsc/canfam2/chainselflink.rb +11 -8
  71. data/lib/bio-ucsc/canfam2/est.rb +11 -8
  72. data/lib/bio-ucsc/canfam2/gap.rb +11 -8
  73. data/lib/bio-ucsc/canfam2/gold.rb +11 -8
  74. data/lib/bio-ucsc/canfam2/intronest.rb +11 -8
  75. data/lib/bio-ucsc/canfam2/mrna.rb +11 -8
  76. data/lib/bio-ucsc/canfam2/rmsk.rb +10 -7
  77. data/lib/bio-ucsc/cavpor3.rb +6 -4
  78. data/lib/bio-ucsc/cb3.rb +5 -3
  79. data/lib/bio-ucsc/cb3/chaincaepb1.rb +11 -8
  80. data/lib/bio-ucsc/cb3/chaincaepb1link.rb +11 -8
  81. data/lib/bio-ucsc/cb3/chaincaerem2.rb +11 -8
  82. data/lib/bio-ucsc/cb3/chaincaerem2link.rb +11 -8
  83. data/lib/bio-ucsc/cb3/chaince6.rb +11 -8
  84. data/lib/bio-ucsc/cb3/chaince6link.rb +11 -8
  85. data/lib/bio-ucsc/cb3/chainpripac1.rb +11 -8
  86. data/lib/bio-ucsc/cb3/chainpripac1link.rb +11 -8
  87. data/lib/bio-ucsc/cb3/est.rb +11 -8
  88. data/lib/bio-ucsc/cb3/gap.rb +11 -8
  89. data/lib/bio-ucsc/cb3/gold.rb +11 -8
  90. data/lib/bio-ucsc/cb3/intronest.rb +11 -8
  91. data/lib/bio-ucsc/cb3/mrna.rb +11 -8
  92. data/lib/bio-ucsc/cb3/rmsk.rb +10 -7
  93. data/lib/bio-ucsc/ce6.rb +8 -4
  94. data/lib/bio-ucsc/ce6/chaincaejap1.rb +11 -8
  95. data/lib/bio-ucsc/ce6/chaincaejap1link.rb +11 -8
  96. data/lib/bio-ucsc/ce6/chaincaepb2.rb +11 -8
  97. data/lib/bio-ucsc/ce6/chaincaepb2link.rb +11 -8
  98. data/lib/bio-ucsc/ce6/chaincaerem3.rb +11 -8
  99. data/lib/bio-ucsc/ce6/chaincaerem3link.rb +11 -8
  100. data/lib/bio-ucsc/ce6/chaincb3.rb +11 -8
  101. data/lib/bio-ucsc/ce6/chaincb3link.rb +11 -8
  102. data/lib/bio-ucsc/ce6/chainpripac1.rb +11 -8
  103. data/lib/bio-ucsc/ce6/chainpripac1link.rb +11 -8
  104. data/lib/bio-ucsc/ce6/chainself.rb +11 -8
  105. data/lib/bio-ucsc/ce6/chainselflink.rb +11 -8
  106. data/lib/bio-ucsc/ce6/est.rb +11 -8
  107. data/lib/bio-ucsc/ce6/gap.rb +11 -8
  108. data/lib/bio-ucsc/ce6/gold.rb +11 -8
  109. data/lib/bio-ucsc/ce6/intronest.rb +11 -8
  110. data/lib/bio-ucsc/ce6/mrna.rb +11 -8
  111. data/lib/bio-ucsc/ce6/rmsk.rb +11 -8
  112. data/lib/bio-ucsc/ce6/t25mersrepeats.rb +7 -4
  113. data/lib/bio-ucsc/ci2.rb +6 -3
  114. data/lib/bio-ucsc/danrer7.rb +6 -3
  115. data/lib/bio-ucsc/{apimel2/db_connection.rb → db_connector.rb} +33 -23
  116. data/lib/bio-ucsc/dm3.rb +6 -4
  117. data/lib/bio-ucsc/dm3/chainanogam1.rb +11 -8
  118. data/lib/bio-ucsc/dm3/chainanogam1link.rb +11 -8
  119. data/lib/bio-ucsc/dm3/chainapimel3.rb +11 -8
  120. data/lib/bio-ucsc/dm3/chainapimel3link.rb +11 -8
  121. data/lib/bio-ucsc/dm3/chaindp3.rb +11 -8
  122. data/lib/bio-ucsc/dm3/chaindp3link.rb +11 -8
  123. data/lib/bio-ucsc/dm3/chaindroana2.rb +11 -8
  124. data/lib/bio-ucsc/dm3/chaindroana2link.rb +11 -8
  125. data/lib/bio-ucsc/dm3/chaindroere1.rb +11 -8
  126. data/lib/bio-ucsc/dm3/chaindroere1link.rb +11 -8
  127. data/lib/bio-ucsc/dm3/chaindroper1.rb +11 -8
  128. data/lib/bio-ucsc/dm3/chaindroper1link.rb +11 -8
  129. data/lib/bio-ucsc/dm3/chaindrosec1.rb +11 -8
  130. data/lib/bio-ucsc/dm3/chaindrosec1link.rb +11 -8
  131. data/lib/bio-ucsc/dm3/chaindrosim1.rb +11 -8
  132. data/lib/bio-ucsc/dm3/chaindrosim1link.rb +11 -8
  133. data/lib/bio-ucsc/dm3/chaindroyak2.rb +11 -8
  134. data/lib/bio-ucsc/dm3/chaindroyak2link.rb +11 -8
  135. data/lib/bio-ucsc/dm3/est.rb +11 -8
  136. data/lib/bio-ucsc/dm3/gap.rb +11 -8
  137. data/lib/bio-ucsc/dm3/gold.rb +11 -8
  138. data/lib/bio-ucsc/dm3/intronest.rb +11 -8
  139. data/lib/bio-ucsc/dm3/mrna.rb +11 -8
  140. data/lib/bio-ucsc/dm3/rmsk.rb +11 -8
  141. data/lib/bio-ucsc/dp3.rb +6 -4
  142. data/lib/bio-ucsc/dp3/chaindm3.rb +11 -8
  143. data/lib/bio-ucsc/dp3/chaindm3link.rb +11 -8
  144. data/lib/bio-ucsc/dp3/est.rb +11 -8
  145. data/lib/bio-ucsc/dp3/gap.rb +11 -8
  146. data/lib/bio-ucsc/dp3/gold.rb +11 -8
  147. data/lib/bio-ucsc/dp3/intronest.rb +11 -8
  148. data/lib/bio-ucsc/dp3/mrna.rb +11 -8
  149. data/lib/bio-ucsc/dp3/rmsk.rb +10 -7
  150. data/lib/bio-ucsc/droana2.rb +6 -4
  151. data/lib/bio-ucsc/droere1.rb +6 -4
  152. data/lib/bio-ucsc/drogri1.rb +6 -4
  153. data/lib/bio-ucsc/dromoj2.rb +7 -5
  154. data/lib/bio-ucsc/droper1.rb +7 -5
  155. data/lib/bio-ucsc/drosec1.rb +6 -4
  156. data/lib/bio-ucsc/drosim1.rb +6 -4
  157. data/lib/bio-ucsc/drosim1/chaindm3.rb +11 -8
  158. data/lib/bio-ucsc/drosim1/chaindm3link.rb +11 -8
  159. data/lib/bio-ucsc/drosim1/est.rb +11 -8
  160. data/lib/bio-ucsc/drosim1/gap.rb +11 -8
  161. data/lib/bio-ucsc/drosim1/gold.rb +11 -8
  162. data/lib/bio-ucsc/drosim1/intronest.rb +11 -8
  163. data/lib/bio-ucsc/drosim1/mrna.rb +11 -8
  164. data/lib/bio-ucsc/drosim1/rmsk.rb +10 -7
  165. data/lib/bio-ucsc/drovir2.rb +6 -4
  166. data/lib/bio-ucsc/droyak2.rb +6 -4
  167. data/lib/bio-ucsc/droyak2/chaindm3.rb +11 -8
  168. data/lib/bio-ucsc/droyak2/chaindm3link.rb +11 -8
  169. data/lib/bio-ucsc/droyak2/est.rb +11 -8
  170. data/lib/bio-ucsc/droyak2/gap.rb +11 -8
  171. data/lib/bio-ucsc/droyak2/gold.rb +11 -8
  172. data/lib/bio-ucsc/droyak2/intronest.rb +11 -8
  173. data/lib/bio-ucsc/droyak2/mrna.rb +11 -8
  174. data/lib/bio-ucsc/droyak2/rmsk.rb +10 -7
  175. data/lib/bio-ucsc/equcab2.rb +7 -4
  176. data/lib/bio-ucsc/equcab2/est.rb +11 -8
  177. data/lib/bio-ucsc/equcab2/gap.rb +11 -8
  178. data/lib/bio-ucsc/equcab2/gold.rb +11 -8
  179. data/lib/bio-ucsc/equcab2/intronest.rb +11 -8
  180. data/lib/bio-ucsc/equcab2/mrna.rb +11 -8
  181. data/lib/bio-ucsc/equcab2/rmsk.rb +10 -7
  182. data/lib/bio-ucsc/felcat4.rb +7 -4
  183. data/lib/bio-ucsc/file.rb +10 -0
  184. data/lib/bio-ucsc/file/bytequeue.rb +28 -0
  185. data/lib/bio-ucsc/file/twobit.rb +174 -0
  186. data/lib/bio-ucsc/fr2.rb +5 -3
  187. data/lib/bio-ucsc/fr2/chaingalgal3.rb +11 -8
  188. data/lib/bio-ucsc/fr2/chaingalgal3link.rb +11 -8
  189. data/lib/bio-ucsc/fr2/chaingasacu1.rb +11 -8
  190. data/lib/bio-ucsc/fr2/chaingasacu1link.rb +11 -8
  191. data/lib/bio-ucsc/fr2/chainmm9.rb +11 -8
  192. data/lib/bio-ucsc/fr2/chainmm9link.rb +11 -8
  193. data/lib/bio-ucsc/fr2/chainorylat2.rb +11 -8
  194. data/lib/bio-ucsc/fr2/chainorylat2link.rb +11 -8
  195. data/lib/bio-ucsc/fr2/est.rb +11 -8
  196. data/lib/bio-ucsc/fr2/gap.rb +11 -8
  197. data/lib/bio-ucsc/fr2/gold.rb +11 -8
  198. data/lib/bio-ucsc/fr2/intronest.rb +11 -8
  199. data/lib/bio-ucsc/fr2/mrna.rb +11 -8
  200. data/lib/bio-ucsc/fr2/rmsk.rb +10 -7
  201. data/lib/bio-ucsc/galgal3.rb +8 -5
  202. data/lib/bio-ucsc/galgal3/chainanocar1.rb +11 -8
  203. data/lib/bio-ucsc/galgal3/chainanocar1link.rb +11 -8
  204. data/lib/bio-ucsc/galgal3/chainbraflo1.rb +11 -8
  205. data/lib/bio-ucsc/galgal3/chainbraflo1link.rb +11 -8
  206. data/lib/bio-ucsc/galgal3/chaincavpor3.rb +11 -8
  207. data/lib/bio-ucsc/galgal3/chaincavpor3link.rb +11 -8
  208. data/lib/bio-ucsc/galgal3/chaindanrer4.rb +11 -8
  209. data/lib/bio-ucsc/galgal3/chaindanrer4link.rb +11 -8
  210. data/lib/bio-ucsc/galgal3/chainfr2.rb +11 -8
  211. data/lib/bio-ucsc/galgal3/chainfr2link.rb +11 -8
  212. data/lib/bio-ucsc/galgal3/chaingasacu1.rb +11 -8
  213. data/lib/bio-ucsc/galgal3/chaingasacu1link.rb +11 -8
  214. data/lib/bio-ucsc/galgal3/chainmm9.rb +11 -8
  215. data/lib/bio-ucsc/galgal3/chainmm9link.rb +11 -8
  216. data/lib/bio-ucsc/galgal3/chainornana1.rb +11 -8
  217. data/lib/bio-ucsc/galgal3/chainornana1link.rb +11 -8
  218. data/lib/bio-ucsc/galgal3/chainpetmar1.rb +11 -8
  219. data/lib/bio-ucsc/galgal3/chainpetmar1link.rb +11 -8
  220. data/lib/bio-ucsc/galgal3/chainrn4.rb +11 -8
  221. data/lib/bio-ucsc/galgal3/chainrn4link.rb +11 -8
  222. data/lib/bio-ucsc/galgal3/chaintaegut1.rb +11 -8
  223. data/lib/bio-ucsc/galgal3/chaintaegut1link.rb +11 -8
  224. data/lib/bio-ucsc/galgal3/chainxentro2.rb +11 -8
  225. data/lib/bio-ucsc/galgal3/chainxentro2link.rb +11 -8
  226. data/lib/bio-ucsc/galgal3/est.rb +11 -8
  227. data/lib/bio-ucsc/galgal3/gap.rb +11 -8
  228. data/lib/bio-ucsc/galgal3/gold.rb +11 -8
  229. data/lib/bio-ucsc/galgal3/intronest.rb +11 -8
  230. data/lib/bio-ucsc/galgal3/mrna.rb +11 -8
  231. data/lib/bio-ucsc/galgal3/rmsk.rb +10 -7
  232. data/lib/bio-ucsc/gasacu1.rb +6 -4
  233. data/lib/bio-ucsc/gasacu1/chainanocar1.rb +11 -8
  234. data/lib/bio-ucsc/gasacu1/chainanocar1link.rb +11 -8
  235. data/lib/bio-ucsc/gasacu1/chainfr2.rb +11 -8
  236. data/lib/bio-ucsc/gasacu1/chainfr2link.rb +11 -8
  237. data/lib/bio-ucsc/gasacu1/chaingalgal3.rb +11 -8
  238. data/lib/bio-ucsc/gasacu1/chaingalgal3link.rb +11 -8
  239. data/lib/bio-ucsc/gasacu1/chainmm9.rb +11 -8
  240. data/lib/bio-ucsc/gasacu1/chainmm9link.rb +11 -8
  241. data/lib/bio-ucsc/gasacu1/chainorylat2.rb +11 -8
  242. data/lib/bio-ucsc/gasacu1/chainorylat2link.rb +11 -8
  243. data/lib/bio-ucsc/gasacu1/est.rb +11 -8
  244. data/lib/bio-ucsc/gasacu1/intronest.rb +11 -8
  245. data/lib/bio-ucsc/gasacu1/mrna.rb +11 -8
  246. data/lib/bio-ucsc/gasacu1/rmsk.rb +10 -7
  247. data/lib/bio-ucsc/gi.rb +27 -0
  248. data/lib/bio-ucsc/go.rb +7 -4
  249. data/lib/bio-ucsc/hg18.rb +7 -4
  250. data/lib/bio-ucsc/hg18/chainanocar1.rb +11 -8
  251. data/lib/bio-ucsc/hg18/chainbostau4.rb +11 -8
  252. data/lib/bio-ucsc/hg18/chainbraflo1.rb +11 -8
  253. data/lib/bio-ucsc/hg18/chaincaljac1.rb +11 -8
  254. data/lib/bio-ucsc/hg18/chaincanfam2.rb +11 -8
  255. data/lib/bio-ucsc/hg18/chaincavpor3.rb +11 -8
  256. data/lib/bio-ucsc/hg18/chaindanrer5.rb +11 -8
  257. data/lib/bio-ucsc/hg18/chainequcab1.rb +11 -8
  258. data/lib/bio-ucsc/hg18/chainfelcat3.rb +11 -8
  259. data/lib/bio-ucsc/hg18/chainfr2.rb +12 -9
  260. data/lib/bio-ucsc/hg18/chaingalgal3.rb +11 -8
  261. data/lib/bio-ucsc/hg18/chaingasacu1.rb +11 -8
  262. data/lib/bio-ucsc/hg18/chainmm9.rb +11 -8
  263. data/lib/bio-ucsc/hg18/chainmondom4.rb +11 -8
  264. data/lib/bio-ucsc/hg18/chainornana1.rb +11 -8
  265. data/lib/bio-ucsc/hg18/chainorylat2.rb +11 -8
  266. data/lib/bio-ucsc/hg18/chainpantro2.rb +11 -8
  267. data/lib/bio-ucsc/hg18/chainpetmar1.rb +11 -8
  268. data/lib/bio-ucsc/hg18/chainponabe2.rb +11 -8
  269. data/lib/bio-ucsc/hg18/chainrhemac2.rb +11 -8
  270. data/lib/bio-ucsc/hg18/chainrn4.rb +11 -8
  271. data/lib/bio-ucsc/hg18/chainself.rb +11 -8
  272. data/lib/bio-ucsc/hg18/chainstrpur2.rb +11 -8
  273. data/lib/bio-ucsc/hg18/chaintaegut1.rb +11 -8
  274. data/lib/bio-ucsc/hg18/chaintetnig2.rb +11 -8
  275. data/lib/bio-ucsc/hg18/chainxentro2.rb +11 -8
  276. data/lib/bio-ucsc/hg18/gap.rb +11 -8
  277. data/lib/bio-ucsc/hg18/gold.rb +11 -8
  278. data/lib/bio-ucsc/hg18/intronest.rb +11 -8
  279. data/lib/bio-ucsc/hg18/rmsk.rb +10 -7
  280. data/lib/bio-ucsc/hg18/rmskrm327.rb +10 -7
  281. data/lib/bio-ucsc/hg19.rb +7 -6
  282. data/lib/bio-ucsc/hg19/description.rb +1 -1
  283. data/lib/bio-ucsc/hg19/gbcdnainfo.rb +2 -2
  284. data/lib/bio-ucsc/hgfixed.rb +7 -4
  285. data/lib/bio-ucsc/loxafr3.rb +7 -4
  286. data/lib/bio-ucsc/mm9.rb +7 -12
  287. data/lib/bio-ucsc/mm9/chainbraflo1.rb +11 -8
  288. data/lib/bio-ucsc/mm9/chainbraflo1link.rb +11 -8
  289. data/lib/bio-ucsc/mm9/chaincanfam2.rb +11 -8
  290. data/lib/bio-ucsc/mm9/chaincanfam2link.rb +11 -8
  291. data/lib/bio-ucsc/mm9/chaincavpor3.rb +11 -8
  292. data/lib/bio-ucsc/mm9/chaincavpor3link.rb +11 -8
  293. data/lib/bio-ucsc/mm9/chainfr2.rb +11 -8
  294. data/lib/bio-ucsc/mm9/chainfr2link.rb +11 -8
  295. data/lib/bio-ucsc/mm9/chaingalgal3.rb +11 -8
  296. data/lib/bio-ucsc/mm9/chaingalgal3link.rb +11 -8
  297. data/lib/bio-ucsc/mm9/chaingasacu1.rb +11 -8
  298. data/lib/bio-ucsc/mm9/chaingasacu1link.rb +11 -8
  299. data/lib/bio-ucsc/mm9/chainornana1.rb +11 -8
  300. data/lib/bio-ucsc/mm9/chainornana1link.rb +11 -8
  301. data/lib/bio-ucsc/mm9/chainorylat2.rb +11 -8
  302. data/lib/bio-ucsc/mm9/chainorylat2link.rb +11 -8
  303. data/lib/bio-ucsc/mm9/chainpetmar1.rb +11 -8
  304. data/lib/bio-ucsc/mm9/chainpetmar1link.rb +11 -8
  305. data/lib/bio-ucsc/mm9/chainponabe2.rb +11 -8
  306. data/lib/bio-ucsc/mm9/chainponabe2link.rb +11 -8
  307. data/lib/bio-ucsc/mm9/chainrhemac2.rb +11 -8
  308. data/lib/bio-ucsc/mm9/chainrhemac2link.rb +11 -8
  309. data/lib/bio-ucsc/mm9/chainrn4.rb +11 -8
  310. data/lib/bio-ucsc/mm9/chainrn4link.rb +11 -8
  311. data/lib/bio-ucsc/mm9/est.rb +11 -8
  312. data/lib/bio-ucsc/mm9/gap.rb +11 -8
  313. data/lib/bio-ucsc/mm9/gold.rb +11 -8
  314. data/lib/bio-ucsc/mm9/intronest.rb +11 -8
  315. data/lib/bio-ucsc/mm9/mrna.rb +11 -8
  316. data/lib/bio-ucsc/mm9/rmsk.rb +10 -7
  317. data/lib/bio-ucsc/mondom5.rb +7 -4
  318. data/lib/bio-ucsc/ornana1.rb +6 -3
  319. data/lib/bio-ucsc/orycun2.rb +5 -3
  320. data/lib/bio-ucsc/orylat2.rb +7 -5
  321. data/lib/bio-ucsc/oviari1.rb +6 -4
  322. data/lib/bio-ucsc/pantro3.rb +7 -5
  323. data/lib/bio-ucsc/petmar1.rb +6 -5
  324. data/lib/bio-ucsc/ponabe2.rb +6 -4
  325. data/lib/bio-ucsc/ponabe2/chainmm9.rb +11 -8
  326. data/lib/bio-ucsc/ponabe2/chainmm9link.rb +11 -8
  327. data/lib/bio-ucsc/ponabe2/chainornana1.rb +11 -8
  328. data/lib/bio-ucsc/ponabe2/chainornana1link.rb +11 -8
  329. data/lib/bio-ucsc/ponabe2/chainrhemac2.rb +11 -8
  330. data/lib/bio-ucsc/ponabe2/chainrhemac2link.rb +11 -8
  331. data/lib/bio-ucsc/ponabe2/est.rb +11 -8
  332. data/lib/bio-ucsc/ponabe2/gap.rb +11 -8
  333. data/lib/bio-ucsc/ponabe2/gold.rb +11 -8
  334. data/lib/bio-ucsc/ponabe2/intronest.rb +11 -8
  335. data/lib/bio-ucsc/ponabe2/mrna.rb +11 -8
  336. data/lib/bio-ucsc/ponabe2/rmsk.rb +10 -7
  337. data/lib/bio-ucsc/pripac1.rb +7 -5
  338. data/lib/bio-ucsc/pripac1/chaincaepb1.rb +11 -8
  339. data/lib/bio-ucsc/pripac1/chaincaepb1link.rb +11 -8
  340. data/lib/bio-ucsc/pripac1/chaincaerem2.rb +11 -8
  341. data/lib/bio-ucsc/pripac1/chaincaerem2link.rb +11 -8
  342. data/lib/bio-ucsc/pripac1/chaincb3.rb +11 -8
  343. data/lib/bio-ucsc/pripac1/chaincb3link.rb +11 -8
  344. data/lib/bio-ucsc/pripac1/chaince6.rb +11 -8
  345. data/lib/bio-ucsc/pripac1/chaince6link.rb +11 -8
  346. data/lib/bio-ucsc/pripac1/gap.rb +11 -8
  347. data/lib/bio-ucsc/pripac1/gold.rb +11 -8
  348. data/lib/bio-ucsc/pripac1/rmsk.rb +10 -7
  349. data/lib/bio-ucsc/proteome.rb +7 -4
  350. data/lib/bio-ucsc/reference.rb +5 -160
  351. data/lib/bio-ucsc/rhemac2.rb +5 -5
  352. data/lib/bio-ucsc/rhemac2/chainmm9.rb +11 -8
  353. data/lib/bio-ucsc/rhemac2/chainmm9link.rb +11 -8
  354. data/lib/bio-ucsc/rhemac2/chainponabe2.rb +11 -8
  355. data/lib/bio-ucsc/rhemac2/chainponabe2link.rb +11 -8
  356. data/lib/bio-ucsc/rhemac2/chainrn4.rb +11 -8
  357. data/lib/bio-ucsc/rhemac2/chainrn4link.rb +11 -8
  358. data/lib/bio-ucsc/rn4.rb +6 -8
  359. data/lib/bio-ucsc/rn4/chaincanfam2.rb +11 -8
  360. data/lib/bio-ucsc/rn4/chaincanfam2link.rb +11 -8
  361. data/lib/bio-ucsc/rn4/chaincavpor3.rb +11 -8
  362. data/lib/bio-ucsc/rn4/chaincavpor3link.rb +11 -8
  363. data/lib/bio-ucsc/rn4/chaindanrer4.rb +11 -8
  364. data/lib/bio-ucsc/rn4/chaindanrer4link.rb +11 -8
  365. data/lib/bio-ucsc/rn4/chainequcab1.rb +11 -8
  366. data/lib/bio-ucsc/rn4/chainequcab1link.rb +11 -8
  367. data/lib/bio-ucsc/rn4/chaingalgal3.rb +11 -8
  368. data/lib/bio-ucsc/rn4/chaingalgal3link.rb +11 -8
  369. data/lib/bio-ucsc/rn4/chainmm9.rb +11 -8
  370. data/lib/bio-ucsc/rn4/chainmm9link.rb +11 -8
  371. data/lib/bio-ucsc/rn4/chainmondom4.rb +11 -8
  372. data/lib/bio-ucsc/rn4/chainmondom4link.rb +11 -8
  373. data/lib/bio-ucsc/rn4/chainpantro2.rb +11 -8
  374. data/lib/bio-ucsc/rn4/chainpantro2link.rb +11 -8
  375. data/lib/bio-ucsc/rn4/chainrhemac2.rb +11 -8
  376. data/lib/bio-ucsc/rn4/chainrhemac2link.rb +11 -8
  377. data/lib/bio-ucsc/rn4/est.rb +11 -8
  378. data/lib/bio-ucsc/rn4/gap.rb +11 -8
  379. data/lib/bio-ucsc/rn4/gold.rb +11 -8
  380. data/lib/bio-ucsc/rn4/intronest.rb +11 -8
  381. data/lib/bio-ucsc/rn4/mrna.rb +11 -8
  382. data/lib/bio-ucsc/rn4/rmsk.rb +10 -7
  383. data/lib/bio-ucsc/saccer2.rb +6 -4
  384. data/lib/bio-ucsc/saccer2/est.rb +11 -8
  385. data/lib/bio-ucsc/saccer2/gap.rb +11 -8
  386. data/lib/bio-ucsc/saccer2/gold.rb +11 -8
  387. data/lib/bio-ucsc/saccer2/intronest.rb +11 -8
  388. data/lib/bio-ucsc/saccer2/mrna.rb +11 -8
  389. data/lib/bio-ucsc/saccer2/t2micron_est.rb +4 -4
  390. data/lib/bio-ucsc/saccer2/t2micron_gap.rb +4 -4
  391. data/lib/bio-ucsc/saccer2/t2micron_gold.rb +4 -4
  392. data/lib/bio-ucsc/saccer2/t2micron_intronEst.rb +4 -4
  393. data/lib/bio-ucsc/saccer2/t2micron_mrna.rb +4 -4
  394. data/lib/bio-ucsc/strpur2.rb +7 -15
  395. data/lib/bio-ucsc/susscr2.rb +6 -4
  396. data/lib/bio-ucsc/susscr2/est.rb +11 -8
  397. data/lib/bio-ucsc/susscr2/intronest.rb +11 -8
  398. data/lib/bio-ucsc/susscr2/mrna.rb +11 -8
  399. data/lib/bio-ucsc/table_class_detector.rb +250 -475
  400. data/lib/bio-ucsc/taegut1.rb +6 -4
  401. data/lib/bio-ucsc/taegut1/chaingalgal3.rb +14 -13
  402. data/lib/bio-ucsc/taegut1/chaingalgal3link.rb +13 -10
  403. data/lib/bio-ucsc/taegut1/est.rb +13 -10
  404. data/lib/bio-ucsc/taegut1/gap.rb +13 -10
  405. data/lib/bio-ucsc/taegut1/gold.rb +13 -10
  406. data/lib/bio-ucsc/taegut1/intronest.rb +13 -10
  407. data/lib/bio-ucsc/taegut1/mrna.rb +13 -10
  408. data/lib/bio-ucsc/taegut1/rmsk.rb +12 -9
  409. data/lib/bio-ucsc/tetnig2.rb +6 -4
  410. data/lib/bio-ucsc/tetnig2/mrna.rb +11 -8
  411. data/lib/bio-ucsc/uniprot.rb +7 -4
  412. data/lib/bio-ucsc/visigene.rb +6 -3
  413. data/lib/bio-ucsc/xentro2.rb +6 -12
  414. data/samples/num-gene-exon.rb +1 -2
  415. data/samples/snp2genes.rb +1 -7
  416. data/samples/symbol2summary.rb +7 -1
  417. data/spec/ailmel1_spec.rb +245 -245
  418. data/spec/aplcal1_spec.rb +0 -30
  419. data/spec/braflo1_spec.rb +1 -1
  420. data/spec/canfam2_spec.rb +6 -6
  421. data/spec/danrer7_spec.rb +6 -6
  422. data/spec/file/twobit.rb +167 -0
  423. data/spec/galgal3_spec.rb +12 -12
  424. data/spec/gasacu1_spec.rb +6 -6
  425. data/spec/hg18/find_by_spec.rb +14 -20
  426. data/spec/hg19/all_mrna_spec.rb +0 -8
  427. data/spec/hg19/altseqliftoverpsl_spec.rb +0 -8
  428. data/spec/hg19/altseqpatches_spec.rb +0 -8
  429. data/spec/hg19/{chainanocar1_spec.rb → chainanocar2_spec.rb} +2 -2
  430. data/spec/hg19/{chainbostau4_spec.rb → chainbostau6_spec.rb} +2 -2
  431. data/spec/hg19/{chainxentro2_spec.rb → chainxentro3_spec.rb} +2 -2
  432. data/spec/hg19/connect_spec.rb +20 -0
  433. data/spec/hg19/find_by_spec.rb +14 -14
  434. data/spec/hg19/gwascatalog_spec.rb +0 -7
  435. data/spec/hg19/hgikmcextra_spec.rb +2 -1
  436. data/spec/hg19/mrnaorinetinfo_spec.rb +0 -8
  437. data/spec/hg19/{netanocar1_spec.rb → netanocar2_spec.rb} +2 -2
  438. data/spec/hg19/{netbostau4_spec.rb → netbostau6_spec.rb} +2 -2
  439. data/spec/hg19/{netxentro2_spec.rb → netxentro3_spec.rb} +2 -2
  440. data/spec/hg19/sibtxgraph_spec.rb +0 -8
  441. data/spec/hg19/snp132_spec.rb +9 -0
  442. data/spec/hg19/xenorefflat_spec.rb +0 -8
  443. data/spec/hg19/xenorefgene_spec.rb +0 -8
  444. data/spec/hg19/xenorefseqali_spec.rb +0 -8
  445. data/spec/mm9_spec.rb +32 -32
  446. data/spec/ornana1_spec.rb +12 -12
  447. data/spec/{named_scope_spec.rb → relation_objects_spec.rb} +14 -0
  448. data/spec/rhemac2_spec.rb +16 -6
  449. data/spec/rn4_spec.rb +12 -12
  450. data/spec/spec_helper.rb +3 -1
  451. data/spec/susscr2_spec.rb +6 -6
  452. data/spec/taegut1_spec.rb +8 -8
  453. data/spec/xentro2_spec.rb +6 -6
  454. metadata +63 -138
  455. data/README.rdoc +0 -202
  456. data/lib/bio-ucsc/ailmel1/db_connection.rb +0 -57
  457. data/lib/bio-ucsc/anocar2/db_connection.rb +0 -57
  458. data/lib/bio-ucsc/anogam1/db_connection.rb +0 -57
  459. data/lib/bio-ucsc/aplcal1/db_connection.rb +0 -57
  460. data/lib/bio-ucsc/bostau4/db_connection.rb +0 -57
  461. data/lib/bio-ucsc/braflo1/db_connection.rb +0 -57
  462. data/lib/bio-ucsc/caejap1/db_connection.rb +0 -57
  463. data/lib/bio-ucsc/caepb2/db_connection.rb +0 -57
  464. data/lib/bio-ucsc/caerem3/db_connection.rb +0 -57
  465. data/lib/bio-ucsc/caljac3/db_connection.rb +0 -55
  466. data/lib/bio-ucsc/canfam2/db_connection.rb +0 -57
  467. data/lib/bio-ucsc/cavpor3/db_connection.rb +0 -57
  468. data/lib/bio-ucsc/cb3/db_connection.rb +0 -57
  469. data/lib/bio-ucsc/ce6/db_connection.rb +0 -55
  470. data/lib/bio-ucsc/ci2/db_connection.rb +0 -55
  471. data/lib/bio-ucsc/danrer7/db_connection.rb +0 -57
  472. data/lib/bio-ucsc/dm3/db_connection.rb +0 -55
  473. data/lib/bio-ucsc/dp3/db_connection.rb +0 -57
  474. data/lib/bio-ucsc/droana2/db_connection.rb +0 -57
  475. data/lib/bio-ucsc/droere1/db_connection.rb +0 -57
  476. data/lib/bio-ucsc/drogri1/db_connection.rb +0 -57
  477. data/lib/bio-ucsc/dromoj2/db_connection.rb +0 -57
  478. data/lib/bio-ucsc/droper1/db_connection.rb +0 -57
  479. data/lib/bio-ucsc/drosec1/db_connection.rb +0 -57
  480. data/lib/bio-ucsc/drosim1/db_connection.rb +0 -57
  481. data/lib/bio-ucsc/drovir2/db_connection.rb +0 -57
  482. data/lib/bio-ucsc/droyak2/db_connection.rb +0 -57
  483. data/lib/bio-ucsc/equcab2/db_connection.rb +0 -57
  484. data/lib/bio-ucsc/felcat4/db_connection.rb +0 -57
  485. data/lib/bio-ucsc/fr2/db_connection.rb +0 -57
  486. data/lib/bio-ucsc/galgal3/db_connection.rb +0 -57
  487. data/lib/bio-ucsc/gasacu1/db_connection.rb +0 -57
  488. data/lib/bio-ucsc/go/db_connection.rb +0 -55
  489. data/lib/bio-ucsc/hg18/db_connection.rb +0 -59
  490. data/lib/bio-ucsc/hg19/db_connection.rb +0 -61
  491. data/lib/bio-ucsc/hgfixed/db_connection.rb +0 -55
  492. data/lib/bio-ucsc/loxafr3/db_connection.rb +0 -57
  493. data/lib/bio-ucsc/mm9/chainanocar1.rb +0 -79
  494. data/lib/bio-ucsc/mm9/chainanocar1link.rb +0 -79
  495. data/lib/bio-ucsc/mm9/chainbostau4.rb +0 -79
  496. data/lib/bio-ucsc/mm9/chainbostau4link.rb +0 -79
  497. data/lib/bio-ucsc/mm9/chainpantro2.rb +0 -81
  498. data/lib/bio-ucsc/mm9/chainpantro2link.rb +0 -82
  499. data/lib/bio-ucsc/mm9/chainxentro2.rb +0 -81
  500. data/lib/bio-ucsc/mm9/chainxentro2link.rb +0 -81
  501. data/lib/bio-ucsc/mm9/db_connection.rb +0 -55
  502. data/lib/bio-ucsc/mondom5/db_connection.rb +0 -57
  503. data/lib/bio-ucsc/ornana1/db_connection.rb +0 -57
  504. data/lib/bio-ucsc/orycun2/db_connection.rb +0 -57
  505. data/lib/bio-ucsc/orylat2/db_connection.rb +0 -55
  506. data/lib/bio-ucsc/oviari1/db_connection.rb +0 -57
  507. data/lib/bio-ucsc/pantro3/db_connection.rb +0 -55
  508. data/lib/bio-ucsc/petmar1/db_connection.rb +0 -57
  509. data/lib/bio-ucsc/ponabe2/db_connection.rb +0 -55
  510. data/lib/bio-ucsc/pripac1/db_connection.rb +0 -57
  511. data/lib/bio-ucsc/proteome/db_connection.rb +0 -59
  512. data/lib/bio-ucsc/rhemac2/chainpantro2.rb +0 -81
  513. data/lib/bio-ucsc/rhemac2/chainpantro2link.rb +0 -81
  514. data/lib/bio-ucsc/rhemac2/db_connection.rb +0 -55
  515. data/lib/bio-ucsc/rn4/chainbostau3.rb +0 -79
  516. data/lib/bio-ucsc/rn4/chainbostau3link.rb +0 -79
  517. data/lib/bio-ucsc/rn4/chainxentro2.rb +0 -81
  518. data/lib/bio-ucsc/rn4/chainxentro2link.rb +0 -81
  519. data/lib/bio-ucsc/rn4/db_connection.rb +0 -55
  520. data/lib/bio-ucsc/saccer2/db_connection.rb +0 -55
  521. data/lib/bio-ucsc/strpur2/chaingalgal3.rb +0 -81
  522. data/lib/bio-ucsc/strpur2/chaingalgal3link.rb +0 -81
  523. data/lib/bio-ucsc/strpur2/chainhg18.rb +0 -81
  524. data/lib/bio-ucsc/strpur2/chainhg18link.rb +0 -81
  525. data/lib/bio-ucsc/strpur2/chainmm9.rb +0 -81
  526. data/lib/bio-ucsc/strpur2/chainmm9link.rb +0 -81
  527. data/lib/bio-ucsc/strpur2/chainpetmar1.rb +0 -81
  528. data/lib/bio-ucsc/strpur2/chainpetmar1link.rb +0 -81
  529. data/lib/bio-ucsc/strpur2/db_connection.rb +0 -57
  530. data/lib/bio-ucsc/strpur2/gap.rb +0 -81
  531. data/lib/bio-ucsc/strpur2/gold.rb +0 -81
  532. data/lib/bio-ucsc/susscr2/db_connection.rb +0 -57
  533. data/lib/bio-ucsc/taegut1/db_connection.rb +0 -57
  534. data/lib/bio-ucsc/tetnig2/db_connection.rb +0 -57
  535. data/lib/bio-ucsc/uniprot/db_connection.rb +0 -59
  536. data/lib/bio-ucsc/visigene/db_connection.rb +0 -55
  537. data/lib/bio-ucsc/xentro2/chaingalgal3.rb +0 -81
  538. data/lib/bio-ucsc/xentro2/chaingalgal3link.rb +0 -81
  539. data/lib/bio-ucsc/xentro2/db_connection.rb +0 -57
  540. data/lib/bio-ucsc/xentro2/est.rb +0 -81
  541. data/lib/bio-ucsc/xentro2/gap.rb +0 -81
  542. data/lib/bio-ucsc/xentro2/gold.rb +0 -81
  543. data/lib/bio-ucsc/xentro2/intronest.rb +0 -81
  544. data/lib/bio-ucsc/xentro2/mrna.rb +0 -81
  545. data/lib/bio-ucsc/xentro2/rmsk.rb +0 -78
  546. data/spec/hg19/chainaplcal1_spec.rb +0 -16
  547. data/spec/hg19/netaplcal1_spec.rb +0 -16
@@ -1,202 +0,0 @@
1
- = bio-ucsc-api
2
-
3
- The Ruby UCSC API: accessing the UCSC Genome Database using Ruby.
4
-
5
- Your comments, suggestions and requests are welcome. Documentation and
6
- feedback are available at the UserEcho site at
7
- http://rubyucscapi.userecho.com/.
8
-
9
- == Install
10
-
11
- $ gem install bio-ucsc-api --no-ri --no-rdoc
12
-
13
- You may need to be root or use "sudo". "--no-ri" and "--no-rdoc" options are recommended because generation of ri/rdoc files takes considerable time.
14
-
15
- == Features
16
-
17
- * Supporting all organisms in the UCSC genome database.
18
- * Using ActiveRecord as an O/R mapping framework. Basically, each tables can access using ActiveRecord method convention.
19
- * Using the Bin index system to improve query performance. This is one of the reason why you use Ruby UCSC API instead of submitting SQL queries directly.
20
- * Supporting genomic sequence query using locally downloaded "2bit" files. Genomic sequences are not stored in UCSC's official MySQL database.
21
- * Automatic conversion of "1-based full-closed intervals" to internal "0-based left-closed right-open intervals" (see also bioruby-genomic-interval)
22
- * Supporting non-official full/partial mirror MySql hosts (e.g. local servers)
23
- * Using Rspec for the testing framework
24
- * Written in pure Ruby and supporting multiple Ruby interpreter implementations including Ruby1.8, Ruby1.9, and JRuby1.6
25
- * Designed as a BioRuby plugin
26
- * Current version does not support table-linked bigWIG/bigBED/BAM files.
27
-
28
- == Supported databases (genome assemblies)
29
-
30
- [human] Hg19, Hg18
31
- [mammals] chimp (PanTro3), orangutan (PonAbe2), rhesus (RheMac2), marmoset (CalJac3), mouse (Mm9), rat (Rn4), guinea pig (CavPor3), rabbit (OryCun2), cat (FelCat4), panda (AilMel1), dog (CanFam2), horse (EquCab2), pig (SusScr2), sheep (OviAri1), cow (BosTau4), elephant (LoxAfr3), opossum (MonDom5), platypus (OrnAna1)
32
- [vertebrates] chicken (GalGal3), zebra finch (TaeGut1), lizard (AnoCar2), X. tropicalis (XenTro2), zebrafish (DanRer7), tetraodon (TetNig2), fugu (Fr2), stickleback (GasAcu1), medaka (OryLat2), lamprey (PetMar1)
33
- [deuterostomes] lancelet (BraFlo1), sea squirt (Ci2), sea urchin (StrPur2)
34
- [insects] D.melanogaster (Dm3), D.simulans (DroSim1), D.sechellia (DroSec1), D.yakuba (DroYak2), D.erecta (DroEre1), D.ananassae (DroAna2), D.pseudoobscura (Dp3), D.persimilis (DroPer1), D.virilis (DroVir2), D.mojavensis (DroMoj2), D.grimshawi (DroGri1), Anopheles mosquito (AnoGam1), honey bee (ApiMel2)
35
- [nematodes] C.elegans (Ce6), C.brenneri (CaePb3), C.briggsae (Cb3), C.remanei (CaeRem3), C.japonica (CaeJap1), P.pacificus (PriPac1)
36
- [others] sea hare (AplCal1), yeast (SacCer2)
37
- [genome assembly independent] Go, HgFixed, Proteome, UniProt, VisiGene
38
-
39
- == Implementation
40
- This package is based on the followings:
41
-
42
- * original ruby-ucsc-api: https://github.com/jandot/ruby-ucsc-api
43
- * ruby-ensembl-api: https://github.com/jandot/ruby-ensembl-api
44
-
45
- Supported Ruby interpreter implementations:
46
-
47
- * Ruby version 1.9.2 or later
48
- * Ruby version 1.8.7 or later
49
- * JRuby version 1.6.3 or later - Appropiate Java heap size may have to be specified to invoke JRuby, especially when you use Bio::Ucsc::Reference. Try "jruby -J-Xmx3g your_script.rb" to keep 3G byte heap.
50
-
51
- Major dependent gems:
52
-
53
- * active_record (version ~> 3.0.7. version 3.1 is not supported yet.) - http://api.rubyonrails.org/classes/ActiveRecord/Base.html
54
- * bioruby-genomic-interval - https://github.com/misshie/bioruby-genomic-interval
55
- * mysql (MySQL/Ruby MySQL API module) - http://www.tmtm.org/mysql/ruby/README.html
56
-
57
- See also:
58
-
59
- * Strozzi F, Aerts J: A Ruby API to query the Ensembl database for genomic features. Bioinformatics 2011, 27:1013-1014.
60
- * UCSCBin library - https://github.com/misshie/UCSCBin
61
-
62
- == Change Log
63
- * *BUG* (v.0.3.1): Does not work with ActiveRecord version 3.1.0. Data retrieval methods occur the error, "(Object doesn't support #inspect)". The author is working on this bug. So far, please use version 3.0 seriese. Gemfile for gem dependencies is updated. Thanks for bug reports from Diego Pereira.
64
- * *BUG-FIX* (v.0.3.1): "func" fields in tables did not work. The bug was fixed.
65
- * *BUG-FIX* (v.0.3.1): PredGene-type tables without the bin index did not work. The bug was fixed.
66
- * *NEW* (v.0.3.0): Now genomic interval queries are expressed using the named scope "with_interval". Table#find_(all_)by_interval is now deprecated. Sorry for an inconstant API. However, this change enable combination queries using genomic intervals and any fields.
67
- * *NEW* (v.0.3.0): Bio::GenomicInterval#bin_all and Bio::GenomicInterval#bin return the bin index for the given interval.
68
- * *NEW* (v.0.3.0): Supporting JRuby 1.6.3 or later. Appropiate Java heap size may have to be specified to invoke JRuby, especially when you use Bio::Ucsc::Reference. Try "jruby -J-Xmx3g your_script.rb" to keep 3G byte heap.
69
- * *NEW* (v.0.2.1): New genome assemblies are supported: [chimp] PanTro3, [orangutan] PonAbe2, [rhesus] RheMac2, [marmoset] CalJac3, [rat] Rn4, [guinea pig] CavPor3, [rabbit] OryCun2, [cat] FelCat4, [panda] AilMel1, [Dog] CanFam2, [horse] EquCab2, [pig] SusScr2, [sheep] OviAri1, [cow] BosTau4, [elephant] LoxAfr3, [opossum] MonDom5, [platypus] OrnAna1, [chicken] GalGal3, [zebra finch] TaeGut1, [lizard] AnoCar2, [X. tropicalis] XenTro2, [zebrafish] DanRer7, [tetraodon] TetNig2, [fugu] Fr2, [stickleback] GasAcu1, [medaka] OryLat2, [lamprey] PerMar1, [lancelet] BraFlo1, [sea squirt] Ci2, [sea urchin] StrPur2, [D.simulans] DroSim1, [D.sechellia] DroSec1, [D.yakuba] DroYak2, [D.electa] DroEre1, [D.ananassae] DroAna2, [D.pseudoobscura] Dp3, [D.persimilis] DroPer1, [D. virilis] DroVir2, [D.mojavensis] DroMoj2, [D.grimshawi] DroGri1, [Anopheles mosquito] AnoGam1, [honey bee] ApiMel2, [C.brenneri] CaePb3, [C.briggsae] Cb3, [C.remanei] CaeRem3, [P.pacificus] PriPac1, [sea hare] AplCal1, [yeast] SacCer2
70
- * *NEW* (v.0.2.1): Supporting Ruby 1.8.7 or later
71
- * *NEW* Adding to human Hg19 and Hg18, the following genome assemblies are supported: [mouse] Mm9, [fruitfly] Dm3, [C. elegans] Ce6, [genome assembly independent] Go, HgFixed, Proteome, UniProt, VisiGene
72
- * *UPDATE* (v0.2.0): Internal table class mapping algorithm are changed. Now table types are automatically detected and dynamically defined as classes. Previous versions used static class definition for all tables.
73
- * *MODIFIED* (v0.2.0): Bio::Ucsc::[Hg18|Hg19]::ReferenceSequence are removed. Use Bio::Ucsc::Reference instead. This class is more object-oriented.
74
- * *MODIFIED* (v0.1.0): The name of this library is now "Ruby UCSC API". The RubyGem name and the GitHub account and the library name are not changed.
75
- * *MODIFIED* (v0.1.0): Bio::Ucsc::[Hg18|Hg19]::Reference is replaced by Bio::Ucsc::[Hg18|Hg19]::ReferenceSequence.
76
- * *UPDATE* (v0.0.5): Almost all hg18 tables are supported.
77
- * *UPDATE* (v0.0.5): find_by_interval and find_all_by_interval class methods accept the "partial" option. Default is true. When "partial: false" is opted, return value will be only fully-included (non-partially-included) records.
78
- * *UPDATE* (v0.0.4): Almost all hg19 tables are supported. "filename" tables in ENCODE dataset are omitted. Each of them contains only single record of a path to the raw data file. Definitions of table relations are incomplete.
79
- * *NEW* (v0.0.3): Supporting locally-stored '2bit' files, which can be downloaded from the UCSC site, to retrieve referential sequence. Now supporting unknown "N" nucleotide blocks, however, "mask-blocks", which are shown in lower-case in UCSC's DNA function, are not supported yet.
80
- * *MODIFIED* (v0.0.3): For the "TABLE" class and the "column" column, TABLE.find_by_column retrieves a first record, and TABLE.find_all_by_column retrieves all the records as an Array.
81
- * *NEW* (v0.0.3-0.0.4): Supporting tables divided into each chromosome, such as "*_RmsK" and "*_gold". Actual names of them are like "chr1_Rmsk", "chr2_Rmsk"... They can be accessed without chromosome names; but with just like "Rmsk" and "Gold".
82
-
83
- == How to Use
84
- === Basics
85
- * A database of a genome assembly is represented as a module in the Bio::Ucsc module. For example, human hg19 database is referred by "Bio::Ucsc::Hg19".
86
- * Before using a database, establish a connection to the database. For example, "Bio::Ucsc::Hg19::DBConnection.connect".
87
- * A table in a database is represented as a class in the database module. For example, the snp132 table in the hg19 database is referred by "Bio::Ucsc::Hg19::Snp132".
88
- * Queries to a field (column) in a table are represented by class methods of the table class. For example, finding the first record (row) of the snp132 table in the hg19 database is "Bio::Ucsc::Hg19::Snp132.first".
89
- * Queries using genomic intervals are supported by the named scope ".with_intervals" and ".with_intervals_excl (omitting pertially included annotations)" method of the table class. The method accepts a Bio::GenomicInterval object containing a genomic interval such as "chr1:1233-5678". If a table to query has the "bin" column, the bin index system is automatically used to speed-up the query.
90
- * Fields in a retrieved record can be acccessed by using instance methods of a record object. For example, the name field of a table record stored in the "result" variable is "result.name".
91
-
92
- === Sample Codes
93
- At first, you have to declare the API and establish the connection to a database.
94
- require 'bio-ucsc'
95
-
96
- include Bio # To short-cut the class path
97
- Ucsc::Hg19::DBConnection.connect
98
-
99
- In the first reference of a table class, the followings does not work:
100
- include Bio::Ucsc::Hg19
101
- Snp131.first # The Ruby interpreter searchs Snp131 at the top-level
102
- But the following line works because the API will fail to prefetch the table and define the appropriate class dynamically. "include Bio" or "include Bio::Ucsc" will work.
103
- Ucsc::Hg19::Snp131 # This line works
104
-
105
- Table search using genomic intervals:
106
- gi = GenomicInterval.parse("chr1:1-11,000")
107
- Ucsc::Hg19::Snp131.with_interval(gi).find(:all).each do |e|
108
- i = GenomicInterval.zero_based(e.chrom, e.chromStart, e.chromEnd)
109
- puts "#{i.chrom}\t#{i.chr_start}\t#{e.name}\t#{e[:class]}" # "e.class" does not work
110
- end
111
-
112
- gi = GenomicInterval.parse("chr17:7,579,614-7,579,700")
113
- Ucsc::Hg19::Snp131.with_interval(gi).find(:all)
114
-
115
- Ucsc::Hg19::Snp131.with_interval_excl(gi).find(:all)
116
-
117
- Ucsc::Hg19::Snp132.with_interval(gi).select(:name).find_all_by_class_and_strand("in-del", "+")
118
-
119
- Ucsc::Hg19::Snp131.find_by_name("rs56289060")
120
-
121
- Sometimes, queries using raw SQLs provide elegant solutions.
122
- sql << 'SQL'
123
- SELECT name,chrom,chromStart,chromEnd,observed
124
- FROM snp131
125
- WHERE name="rs56289060"
126
- SQL
127
- p Ucsc::Hg19::Snp131.find_by_sql(sql)
128
-
129
- retrieve reference sequence from a locally-stored 2bit file. The "hg19.2bit" file can be downloaded from http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/hg19.2bit
130
- hg19ref = Ucsc::Reference.load("hg19.2bit")
131
- gi = GenomicInterval.parse("chr1:9,500-10,999")
132
- hg19ref.find_by_interval(gi)
133
-
134
- Connetcting to non-official or local full/partial mirror MySQL servers
135
- Ucsc::Hg18::DBConnection.db_host = 'localhost'
136
- Ucsc::Hg18::DBConnection.db_username = 'genome'
137
- Ucsc::Hg18::DBConnection.db_password = ''
138
- Ucsc::Hg18::DBConnection.connect
139
-
140
- Ucsc::Hg18::DBConnection.default # reset to connect UCSC's public MySQL sever
141
- Ucsc::Hg18::DBConnection.connect
142
-
143
- And see also sample scripts in the samples directory.
144
- * num-gene-exon.rb - calculation of total number of genes and exons using genomic interval
145
- * symbol2summary.rb - getting summary descriptions using gene symbol
146
- * hg19-2bit-retrieve - outputting reference sequence in FASTA format
147
- * bed2refseq - getting unique gene symbols in the genomic intervals in a BED file.
148
- * snp2gene - sample for retrieving fields from associated tables
149
-
150
- === Notes of Exceptions in Table Support
151
- * Table names starting with a number: Because Ruby class names cannot start with number, use the table class name starting with "T" (T for Table). Thus, the "2micron_est" table is supported by the "T2micron_est" class.
152
- * Table names starting with uppercase character: Classes for "HInv" and "NIAGene" tables are "HInv" and "NIAGene", respectively
153
- * Accessing chromosome-specific tables: For example, the 'rmsk' table in hg18 is actually separated into 'chr1_rmsk', 'chr2_rmsk'... There is two way to access to them. (1) Accessing separated tables directly. There is no difference from other regular tables. However, you have to manage each separated tables. (2) Use abstract table classes (e.g., 'Rmsk') and their class methods ".find_by_interval' or '.find_all_by_interval'. These methods look for correspondent separated tables automatically. However, you cannot combine with other 'find_by_[field]' methods. Moreover, if you have to perform single- or multi-chromosomal search, you have to access separated tables individually and integrate results by yourself. Fortunately, recent databases, including hg19, seem not to use chromosome-specific tables.
154
- * For honey bee ApiMel2 database, Group*_chainDm2 and Group*_chainDm2Link tables are accessible using find(_all)_by_interval class methods of the ChainDm2 and ChainDm2Link classes.
155
- * Special field (column) names: Field names such as 'attribute', 'valid', 'validate', 'class', 'method', 'methods', and 'type' cannot be accessed using instance methods. This restriction is because of the collision of method names that are internally used by ActiveRecord. Instead, use hash to access the field like "result[:type]".
156
-
157
- === Details in "with_interval"
158
- * When a table class is referred first time, the API prefetches the table to get a list of fields and dynamically defines a class using following algorithm.
159
- * If chrom/chromStart/chromEnd fields exist (BED table), the API uses them for interval queries.
160
- * When tName/tStart/tEnd fields exist (PSL table), the API uses them for interval queries.
161
- * When chrom/txStart/txEnd fields exist (genePred table), the API uses them for interval queries.
162
- * When genoName/genoStart/genoEnd fields exist (RMSK table), the API uses them for interval queries.
163
- * If the table has the "bin" column, the API calculate bin index to build a query.
164
- * Otherwise, the API does not support interval queries but support only ActiveRecord's standard methods such as "find_(all_)by_[field name]".
165
- === Table Associations
166
- See samples/snp2gene.rb. Association definition using "has_one/has_many/belongs_to" methods is shown below. class_eval is used not to replace but to add definition.
167
-
168
- Bio::Ucsc::Hg19::KnownGene.class_eval %!
169
- has_one :knownToEnsembl, {:primary_key => :name, :foreign_key => :name}
170
- !
171
- Bio::Ucsc::Hg19::KnownToEnsembl.class_eval %!
172
- belongs_to :knownGene
173
- has_one :ensGtp, {:primary_key => :value, :foreign_key => :transcript}
174
- has_one :kgXref, {:primary_key => :name, :foreign_key => :kgID}
175
- !
176
- Bio::Ucsc::Hg19::EnsGtp.class_eval %!
177
- belongs_to :knownToEnsembl
178
- !
179
- Bio::Ucsc::Hg19::KgXref.class_eval %!
180
- belongs_to :knownToEnsembl
181
- has_one :refLink, {:primary_key => :mRNA, :foreign_key => :mrnaAcc}
182
- !
183
- Bio::Ucsc::Hg19::RefLink.class_eval %!
184
- belongs_to :kgXref
185
- !
186
-
187
- ActiveRecord::Base#find can be used with the :include option to perform "eager fetching".
188
- kg = Bio::Ucsc::Hg19::KnownGene.with_interval(gi).
189
- find(:first,
190
- :include => [:knownToEnsembl => :ensGtp,
191
- :knownToEnsembl => {:kgXref => :refLink}])
192
-
193
- And fields can be referred like the followings:
194
- kg.knownToEnsembl.ensGtp.gene
195
- kg.knownToEnsembl.kgXref.geneSymbol
196
- kg.knownToEnsembl.kgXref.refLink.mrnaAcc
197
-
198
- == Copyright
199
- Copyright:: (c) 2011 MISHIMA, Hiroyuki (missy at be.to / hmishima at nagasaki-u.ac.jp / @mishimahryk in Twitter)
200
- Copyright:: (c) 2010 Jan Aerts
201
-
202
- License:: Ruby license (Ruby's / GPLv2 dual). See COPYING and COPYING.ja for further details.
@@ -1,57 +0,0 @@
1
- #
2
- # = db_connection.rb
3
- #
4
- # Copyright:: Cioyrught (C) 2011
5
- # MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
6
- # Copyright:: Copyright (C) 2010
7
- # Jan Aerts <jan.aerts@gmail.com>
8
- # License:: Ruby licence (Ryby's / GPLv2 dual)
9
-
10
- require 'rubygems'
11
- require 'active_record'
12
-
13
- module Bio
14
- module Ucsc
15
-
16
- module AilMel1
17
- DATABASE_NAME = "ailMel1"
18
-
19
- # = DESCRIPTION
20
- # Actual connection established to the UCSC mysql server.
21
- class DBConnection < ActiveRecord::Base
22
- # = DESCRIPTION
23
- # The #connect method make the connection
24
- #
25
- # = USAGE
26
- # Bin::Ucsc::DB_NAME::DBConnection.connect
27
- #
28
- # ---
29
- # *Arguments*: none
30
- @@db_adapter ||= 'mysql'
31
- @@db_host ||= 'genome-mysql.cse.ucsc.edu'
32
- @@db_username ||= 'genome'
33
- @@db_password ||= ''
34
-
35
- cattr_accessor :db_adapter, :db_host, :db_username, :db_password
36
-
37
- self.abstract_class = true
38
-
39
- def self.default
40
- @@db_adapter = 'mysql'
41
- @@db_host = 'genome-mysql.cse.ucsc.edu'
42
- @@db_username = 'genome'
43
- @@db_password = ''
44
- end
45
-
46
- def self.connect
47
- establish_connection({ :adapter => @@db_adapter,
48
- :host => @@db_host,
49
- :database => DATABASE_NAME,
50
- :username => @@db_username,
51
- :password => @@db_password, })
52
- end
53
- end
54
- end # module
55
-
56
- end # module Ucsc
57
- end # module Bio
@@ -1,57 +0,0 @@
1
- #
2
- # = db_connection.rb - UCSC DB connection
3
- #
4
- # Copyright:: Cioyrught (C) 2011
5
- # MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
6
- # Copyright:: Copyright (C) 2010
7
- # Jan Aerts <jan.aerts@gmail.com>
8
- # License:: Ruby licence (Ryby's / GPLv2 dual)
9
-
10
- require 'rubygems'
11
- require 'active_record'
12
-
13
- module Bio
14
- module Ucsc
15
-
16
- module AnoCar2
17
- DATABASE_NAME = "anoCar2"
18
-
19
- # = DESCRIPTION
20
- # Actual connection established to the UCSC mysql server.
21
- class DBConnection < ActiveRecord::Base
22
- # = DESCRIPTION
23
- # The #connect method make the connection
24
- #
25
- # = USAGE
26
- # Bin::Ucsc::DB_NAME::DBConnection.connect
27
- #
28
- # ---
29
- # *Arguments*: none
30
- @@db_adapter ||= 'mysql'
31
- @@db_host ||= 'genome-mysql.cse.ucsc.edu'
32
- @@db_username ||= 'genome'
33
- @@db_password ||= ''
34
-
35
- cattr_accessor :db_adapter, :db_host, :db_username, :db_password
36
-
37
- self.abstract_class = true
38
-
39
- def self.default
40
- @@db_adapter = 'mysql'
41
- @@db_host = 'genome-mysql.cse.ucsc.edu'
42
- @@db_username = 'genome'
43
- @@db_password = ''
44
- end
45
-
46
- def self.connect
47
- establish_connection({ :adapter => @@db_adapter,
48
- :host => @@db_host,
49
- :database => DATABASE_NAME,
50
- :username => @@db_username,
51
- :password => @@db_password, })
52
- end
53
- end
54
- end # module
55
-
56
- end # module Ucsc
57
- end # module Bio
@@ -1,57 +0,0 @@
1
- #
2
- # = db_connection.rb - UCSC DB connection
3
- #
4
- # Copyright:: Cioyrught (C) 2011
5
- # MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
6
- # Copyright:: Copyright (C) 2010
7
- # Jan Aerts <jan.aerts@gmail.com>
8
- # License:: Ruby licence (Ryby's / GPLv2 dual)
9
-
10
- require 'rubygems'
11
- require 'active_record'
12
-
13
- module Bio
14
- module Ucsc
15
-
16
- module AnoGam1
17
- DATABASE_NAME = "anoGam1"
18
-
19
- # = DESCRIPTION
20
- # Actual connection established to the UCSC mysql server.
21
- class DBConnection < ActiveRecord::Base
22
- # = DESCRIPTION
23
- # The #connect method make the connection
24
- #
25
- # = USAGE
26
- # Bin::Ucsc::DB_NAME::DBConnection.connect
27
- #
28
- # ---
29
- # *Arguments*: none
30
- @@db_adapter ||= 'mysql'
31
- @@db_host ||= 'genome-mysql.cse.ucsc.edu'
32
- @@db_username ||= 'genome'
33
- @@db_password ||= ''
34
-
35
- cattr_accessor :db_adapter, :db_host, :db_username, :db_password
36
-
37
- self.abstract_class = true
38
-
39
- def self.default
40
- @@db_adapter = 'mysql'
41
- @@db_host = 'genome-mysql.cse.ucsc.edu'
42
- @@db_username = 'genome'
43
- @@db_password = ''
44
- end
45
-
46
- def self.connect
47
- establish_connection({ :adapter => @@db_adapter,
48
- :host => @@db_host,
49
- :database => DATABASE_NAME,
50
- :username => @@db_username,
51
- :password => @@db_password, })
52
- end
53
- end
54
- end # module
55
-
56
- end # module Ucsc
57
- end # module Bio
@@ -1,57 +0,0 @@
1
- #
2
- # = db_connection.rb - UCSC DB connection
3
- #
4
- # Copyright:: Cioyrught (C) 2011
5
- # MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
6
- # Copyright:: Copyright (C) 2010
7
- # Jan Aerts <jan.aerts@gmail.com>
8
- # License:: Ruby licence (Ryby's / GPLv2 dual)
9
-
10
- require 'rubygems'
11
- require 'active_record'
12
-
13
- module Bio
14
- module Ucsc
15
-
16
- module AplCal1
17
- DATABASE_NAME = "aplCal1"
18
-
19
- # = DESCRIPTION
20
- # Actual connection established to the UCSC mysql server.
21
- class DBConnection < ActiveRecord::Base
22
- # = DESCRIPTION
23
- # The #connect method make the connection
24
- #
25
- # = USAGE
26
- # Bin::Ucsc::DB_NAME::DBConnection.connect
27
- #
28
- # ---
29
- # *Arguments*: none
30
- @@db_adapter ||= 'mysql'
31
- @@db_host ||= 'genome-mysql.cse.ucsc.edu'
32
- @@db_username ||= 'genome'
33
- @@db_password ||= ''
34
-
35
- cattr_accessor :db_adapter, :db_host, :db_username, :db_password
36
-
37
- self.abstract_class = true
38
-
39
- def self.default
40
- @@db_adapter = 'mysql'
41
- @@db_host = 'genome-mysql.cse.ucsc.edu'
42
- @@db_username = 'genome'
43
- @@db_password = ''
44
- end
45
-
46
- def self.connect
47
- establish_connection({ :adapter => @@db_adapter,
48
- :host => @@db_host,
49
- :database => DATABASE_NAME,
50
- :username => @@db_username,
51
- :password => @@db_password, })
52
- end
53
- end
54
- end # module
55
-
56
- end # module Ucsc
57
- end # module Bio
@@ -1,57 +0,0 @@
1
- #
2
- # = db_connection.rb
3
- #
4
- # Copyright:: Cioyrught (C) 2011
5
- # MISHIMA, Hiroyuki <missy at be.to / hmishima at nagasaki-u.ac.jp>
6
- # Copyright:: Copyright (C) 2010
7
- # Jan Aerts <jan.aerts@gmail.com>
8
- # License:: Ruby licence (Ryby's / GPLv2 dual)
9
-
10
- require 'rubygems'
11
- require 'active_record'
12
-
13
- module Bio
14
- module Ucsc
15
-
16
- module BosTau4
17
- DATABASE_NAME = "bosTau4"
18
-
19
- # = DESCRIPTION
20
- # Actual connection established to the UCSC mysql server.
21
- class DBConnection < ActiveRecord::Base
22
- # = DESCRIPTION
23
- # The #connect method make the connection
24
- #
25
- # = USAGE
26
- # Bin::Ucsc::DB_NAME::DBConnection.connect
27
- #
28
- # ---
29
- # *Arguments*: none
30
- @@db_adapter ||= 'mysql'
31
- @@db_host ||= 'genome-mysql.cse.ucsc.edu'
32
- @@db_username ||= 'genome'
33
- @@db_password ||= ''
34
-
35
- cattr_accessor :db_adapter, :db_host, :db_username, :db_password
36
-
37
- self.abstract_class = true
38
-
39
- def self.default
40
- @@db_adapter = 'mysql'
41
- @@db_host = 'genome-mysql.cse.ucsc.edu'
42
- @@db_username = 'genome'
43
- @@db_password = ''
44
- end
45
-
46
- def self.connect
47
- establish_connection({ :adapter => @@db_adapter,
48
- :host => @@db_host,
49
- :database => DATABASE_NAME,
50
- :username => @@db_username,
51
- :password => @@db_password, })
52
- end
53
- end
54
- end # module
55
-
56
- end # module Ucsc
57
- end # module Bio