rbbt-marq 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/R/CustomDS.R +80 -0
- data/R/GEO.R +249 -0
- data/R/MA.R +359 -0
- data/README.rdoc +58 -0
- data/bin/marq_config +209 -0
- data/install_scripts/CustomDS/Rakefile +223 -0
- data/install_scripts/GEO/Rakefile +272 -0
- data/install_scripts/GEO/platforms/GPL100.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1002.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1007.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL101.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1010.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1073.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1074.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1090.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1104.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL118.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1205.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1211.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1213.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1219.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1223.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1226.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1229.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1230.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1231.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1232.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1260.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1261.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL127.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL128.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1290.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1292.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1293.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1294.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1295.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL13.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1310.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1313.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1323.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1331.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1352.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1355.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1382.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1387.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1397.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL14.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1412.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1415.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1420.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL144.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1449.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1458.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1523.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1524.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1528.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL153.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1530.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1535.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL155.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL163.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL168.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL169.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1704.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1708.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1739.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1740.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1749.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL177.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1790.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1792.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL181.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1818.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1820.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1823.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1826.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL183.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1831.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1833.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1872.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1911.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1914.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1928.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1942.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1945.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL1964.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL198.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL1981.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL200.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2006.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL201.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2011.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2026.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL205.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL207.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2136.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL220.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL226.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL24.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL246.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL247.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2507.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2529.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2531.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL254.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2569.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL257.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2598.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL260.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2614.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2622.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2623.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2660.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2670.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2677.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2700.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2721.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2727.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL273.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2763.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2824.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL284.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL287.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2872.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL288.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2883.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL289.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2895.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2897.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2902.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL2987.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL2995.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3039.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3050.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3084.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3113.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL317.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL319.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL32.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3222.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3295.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3305.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3306.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3307.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL333.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3341.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3349.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL339.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL340.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3408.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL341.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3415.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3423.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3440.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3457.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3504.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3506.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL355.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3558.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3607.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL368.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL3695.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL371.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL3834.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL4006.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL4055.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL409.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL4191.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL4226.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL4371.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL4567.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL4685.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL483.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL49.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL50.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL500.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL507.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL51.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL513.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL519.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL52.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL529.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL53.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL5356.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL538.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL54.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL543.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL544.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL545.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL546.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL547.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL549.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL550.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL56.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL560.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL564.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL57.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL570.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL571.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL576.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL58.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL5823.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL59.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL5915.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL5947.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL61.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL64.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL6419.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL6424.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL65.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL6574.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL6649.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL67.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL6720.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL7054.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL737.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL738.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL74.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL75.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL76.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL764.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL772.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL782.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL783.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL784.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL80.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL81.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL82.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL83.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL85.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL86.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL87.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL870.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL875.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL884.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL887.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL89.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL890.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL891.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL90.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL91.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL92.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL920.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL922.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL924.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL93.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL96.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL968.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL97.yaml +4 -0
- data/install_scripts/GEO/platforms/GPL98.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL981.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL99.yaml +7 -0
- data/install_scripts/GEO/platforms/GPL999.yaml +7 -0
- data/install_scripts/GEO/series/GSE10018.yaml +61 -0
- data/install_scripts/GEO/series/GSE1002.yaml +135 -0
- data/install_scripts/GEO/series/GSE10066.yaml +31 -0
- data/install_scripts/GEO/series/GSE10073.yaml +19 -0
- data/install_scripts/GEO/series/GSE10091.yaml +15 -0
- data/install_scripts/GEO/series/GSE101.yaml +17 -0
- data/install_scripts/GEO/series/GSE10100.yaml +15 -0
- data/install_scripts/GEO/series/GSE10101.yaml +15 -0
- data/install_scripts/GEO/series/GSE10102.yaml +15 -0
- data/install_scripts/GEO/series/GSE10267.yaml +37 -0
- data/install_scripts/GEO/series/GSE10268.yaml +115 -0
- data/install_scripts/GEO/series/GSE10279.yaml +23 -0
- data/install_scripts/GEO/series/GSE103.yaml +19 -0
- data/install_scripts/GEO/series/GSE104.yaml +19 -0
- data/install_scripts/GEO/series/GSE10514.yaml +27 -0
- data/install_scripts/GEO/series/GSE10521.yaml +56 -0
- data/install_scripts/GEO/series/GSE10554.yaml +19 -0
- data/install_scripts/GEO/series/GSE1073.yaml +127 -0
- data/install_scripts/GEO/series/GSE10860.yaml +25 -0
- data/install_scripts/GEO/series/GSE10930.yaml +15 -0
- data/install_scripts/GEO/series/GSE10933.yaml +15 -0
- data/install_scripts/GEO/series/GSE10944.yaml +21 -0
- data/install_scripts/GEO/series/GSE10947.yaml +21 -0
- data/install_scripts/GEO/series/GSE10948.yaml +21 -0
- data/install_scripts/GEO/series/GSE11061.yaml +19 -0
- data/install_scripts/GEO/series/GSE11071.yaml +67 -0
- data/install_scripts/GEO/series/GSE11111.yaml +25 -0
- data/install_scripts/GEO/series/GSE11236.yaml +25 -0
- data/install_scripts/GEO/series/GSE11282.yaml +19 -0
- data/install_scripts/GEO/series/GSE11377.yaml +19 -0
- data/install_scripts/GEO/series/GSE11380.yaml +13 -0
- data/install_scripts/GEO/series/GSE11397.yaml +55 -0
- data/install_scripts/GEO/series/GSE11412.yaml +11 -0
- data/install_scripts/GEO/series/GSE11452.yaml +354 -0
- data/install_scripts/GEO/series/GSE11620.yaml +33 -0
- data/install_scripts/GEO/series/GSE11621.yaml +31 -0
- data/install_scripts/GEO/series/GSE11651.yaml +94 -0
- data/install_scripts/GEO/series/GSE11754.yaml +29 -0
- data/install_scripts/GEO/series/GSE11799.yaml +59 -0
- data/install_scripts/GEO/series/GSE11856.yaml +11 -0
- data/install_scripts/GEO/series/GSE11878.yaml +19 -0
- data/install_scripts/GEO/series/GSE11983.yaml +15 -0
- data/install_scripts/GEO/series/GSE12004.yaml +41 -0
- data/install_scripts/GEO/series/GSE12055.yaml +109 -0
- data/install_scripts/GEO/series/GSE12061.yaml +13 -0
- data/install_scripts/GEO/series/GSE12104.yaml +10 -0
- data/install_scripts/GEO/series/GSE12138.yaml +13 -0
- data/install_scripts/GEO/series/GSE12150.yaml +32 -0
- data/install_scripts/GEO/series/GSE12684.yaml +47 -0
- data/install_scripts/GEO/series/GSE12685.yaml +34 -0
- data/install_scripts/GEO/series/GSE1365.yaml +14 -0
- data/install_scripts/GEO/series/GSE1404.yaml +596 -0
- data/install_scripts/GEO/series/GSE1492.yaml +15 -0
- data/install_scripts/GEO/series/GSE15222.yaml +731 -0
- data/install_scripts/GEO/series/GSE1553.yaml +23 -0
- data/install_scripts/GEO/series/GSE1617.yaml +39 -0
- data/install_scripts/GEO/series/GSE1688.yaml +36 -0
- data/install_scripts/GEO/series/GSE1693.yaml +60 -0
- data/install_scripts/GEO/series/GSE1752.yaml +32 -0
- data/install_scripts/GEO/series/GSE1753.yaml +16 -0
- data/install_scripts/GEO/series/GSE1754.yaml +19 -0
- data/install_scripts/GEO/series/GSE1758.yaml +15 -0
- data/install_scripts/GEO/series/GSE1759.yaml +18 -0
- data/install_scripts/GEO/series/GSE1760.yaml +18 -0
- data/install_scripts/GEO/series/GSE1763.yaml +19 -0
- data/install_scripts/GEO/series/GSE1915.yaml +39 -0
- data/install_scripts/GEO/series/GSE1927.yaml +14 -0
- data/install_scripts/GEO/series/GSE1941.yaml +23 -0
- data/install_scripts/GEO/series/GSE1942.yaml +31 -0
- data/install_scripts/GEO/series/GSE1944.yaml +58 -0
- data/install_scripts/GEO/series/GSE1975.yaml +65 -0
- data/install_scripts/GEO/series/GSE20.yaml +24 -0
- data/install_scripts/GEO/series/GSE2107.yaml +14 -0
- data/install_scripts/GEO/series/GSE2159.yaml +31 -0
- data/install_scripts/GEO/series/GSE2246.yaml +157 -0
- data/install_scripts/GEO/series/GSE2263.yaml +57 -0
- data/install_scripts/GEO/series/GSE2267.yaml +155 -0
- data/install_scripts/GEO/series/GSE23.yaml +58 -0
- data/install_scripts/GEO/series/GSE2329.yaml +43 -0
- data/install_scripts/GEO/series/GSE2330.yaml +55 -0
- data/install_scripts/GEO/series/GSE2349.yaml +19 -0
- data/install_scripts/GEO/series/GSE2412.yaml +58 -0
- data/install_scripts/GEO/series/GSE2419.yaml +27 -0
- data/install_scripts/GEO/series/GSE2420.yaml +29 -0
- data/install_scripts/GEO/series/GSE2434.yaml +37 -0
- data/install_scripts/GEO/series/GSE2526.yaml +23 -0
- data/install_scripts/GEO/series/GSE2579.yaml +19 -0
- data/install_scripts/GEO/series/GSE2806.yaml +11 -0
- data/install_scripts/GEO/series/GSE2831.yaml +35 -0
- data/install_scripts/GEO/series/GSE2832.yaml +17 -0
- data/install_scripts/GEO/series/GSE29.yaml +16 -0
- data/install_scripts/GEO/series/GSE3006.yaml +35 -0
- data/install_scripts/GEO/series/GSE3043.yaml +18 -0
- data/install_scripts/GEO/series/GSE3122.yaml +12 -0
- data/install_scripts/GEO/series/GSE3130.yaml +12 -0
- data/install_scripts/GEO/series/GSE3151.yaml +118 -0
- data/install_scripts/GEO/series/GSE3160.yaml +31 -0
- data/install_scripts/GEO/series/GSE3190.yaml +14 -0
- data/install_scripts/GEO/series/GSE3205.yaml +36 -0
- data/install_scripts/GEO/series/GSE3206.yaml +23 -0
- data/install_scripts/GEO/series/GSE3315.yaml +13 -0
- data/install_scripts/GEO/series/GSE3335.yaml +15 -0
- data/install_scripts/GEO/series/GSE34.yaml +31 -0
- data/install_scripts/GEO/series/GSE3470.yaml +15 -0
- data/install_scripts/GEO/series/GSE35.yaml +80 -0
- data/install_scripts/GEO/series/GSE3503.yaml +19 -0
- data/install_scripts/GEO/series/GSE3683.yaml +83 -0
- data/install_scripts/GEO/series/GSE3684.yaml +19 -0
- data/install_scripts/GEO/series/GSE3685.yaml +31 -0
- data/install_scripts/GEO/series/GSE3686.yaml +63 -0
- data/install_scripts/GEO/series/GSE3687.yaml +83 -0
- data/install_scripts/GEO/series/GSE3802.yaml +19 -0
- data/install_scripts/GEO/series/GSE3803.yaml +19 -0
- data/install_scripts/GEO/series/GSE3804.yaml +19 -0
- data/install_scripts/GEO/series/GSE3805.yaml +19 -0
- data/install_scripts/GEO/series/GSE3813.yaml +9 -0
- data/install_scripts/GEO/series/GSE3814.yaml +71 -0
- data/install_scripts/GEO/series/GSE3815.yaml +75 -0
- data/install_scripts/GEO/series/GSE3817.yaml +13 -0
- data/install_scripts/GEO/series/GSE3818.yaml +15 -0
- data/install_scripts/GEO/series/GSE3819.yaml +13 -0
- data/install_scripts/GEO/series/GSE3820.yaml +61 -0
- data/install_scripts/GEO/series/GSE3821.yaml +55 -0
- data/install_scripts/GEO/series/GSE3844.yaml +11 -0
- data/install_scripts/GEO/series/GSE3853.yaml +31 -0
- data/install_scripts/GEO/series/GSE3935.yaml +12 -0
- data/install_scripts/GEO/series/GSE3969.yaml +14 -0
- data/install_scripts/GEO/series/GSE4049.yaml +80 -0
- data/install_scripts/GEO/series/GSE4144.yaml +9 -0
- data/install_scripts/GEO/series/GSE4261.yaml +59 -0
- data/install_scripts/GEO/series/GSE4295.yaml +63 -0
- data/install_scripts/GEO/series/GSE4398.yaml +50 -0
- data/install_scripts/GEO/series/GSE4719.yaml +76 -0
- data/install_scripts/GEO/series/GSE4720.yaml +78 -0
- data/install_scripts/GEO/series/GSE4721.yaml +14 -0
- data/install_scripts/GEO/series/GSE4807.yaml +67 -0
- data/install_scripts/GEO/series/GSE4826.yaml +39 -0
- data/install_scripts/GEO/series/GSE4934.yaml +23 -0
- data/install_scripts/GEO/series/GSE5027.yaml +31 -0
- data/install_scripts/GEO/series/GSE5070.yaml +25 -0
- data/install_scripts/GEO/series/GSE5238.yaml +31 -0
- data/install_scripts/GEO/series/GSE5267.yaml +55 -0
- data/install_scripts/GEO/series/GSE5281.yaml +492 -0
- data/install_scripts/GEO/series/GSE5290.yaml +24 -0
- data/install_scripts/GEO/series/GSE5376.yaml +107 -0
- data/install_scripts/GEO/series/GSE5575.yaml +13 -0
- data/install_scripts/GEO/series/GSE5835.yaml +25 -0
- data/install_scripts/GEO/series/GSE5836.yaml +37 -0
- data/install_scripts/GEO/series/GSE5837.yaml +37 -0
- data/install_scripts/GEO/series/GSE5938.yaml +187 -0
- data/install_scripts/GEO/series/GSE600.yaml +29 -0
- data/install_scripts/GEO/series/GSE6018.yaml +55 -0
- data/install_scripts/GEO/series/GSE6066.yaml +20 -0
- data/install_scripts/GEO/series/GSE6067.yaml +31 -0
- data/install_scripts/GEO/series/GSE6068.yaml +55 -0
- data/install_scripts/GEO/series/GSE6070.yaml +31 -0
- data/install_scripts/GEO/series/GSE6071.yaml +30 -0
- data/install_scripts/GEO/series/GSE6072.yaml +37 -0
- data/install_scripts/GEO/series/GSE6101.yaml +26 -0
- data/install_scripts/GEO/series/GSE6111.yaml +20 -0
- data/install_scripts/GEO/series/GSE6190.yaml +30 -0
- data/install_scripts/GEO/series/GSE6277.yaml +19 -0
- data/install_scripts/GEO/series/GSE6331.yaml +51 -0
- data/install_scripts/GEO/series/GSE6346.yaml +49 -0
- data/install_scripts/GEO/series/GSE6358.yaml +22 -0
- data/install_scripts/GEO/series/GSE6405.yaml +36 -0
- data/install_scripts/GEO/series/GSE6450.yaml +51 -0
- data/install_scripts/GEO/series/GSE6687.yaml +15 -0
- data/install_scripts/GEO/series/GSE6705.yaml +19 -0
- data/install_scripts/GEO/series/GSE6801.yaml +27 -0
- data/install_scripts/GEO/series/GSE6847.yaml +18 -0
- data/install_scripts/GEO/series/GSE6870.yaml +23 -0
- data/install_scripts/GEO/series/GSE7103.yaml +28 -0
- data/install_scripts/GEO/series/GSE7140.yaml +19 -0
- data/install_scripts/GEO/series/GSE7188.yaml +23 -0
- data/install_scripts/GEO/series/GSE7261.yaml +16 -0
- data/install_scripts/GEO/series/GSE7337.yaml +19 -0
- data/install_scripts/GEO/series/GSE7338.yaml +19 -0
- data/install_scripts/GEO/series/GSE7362.yaml +123 -0
- data/install_scripts/GEO/series/GSE7369.yaml +15 -0
- data/install_scripts/GEO/series/GSE7525.yaml +33 -0
- data/install_scripts/GEO/series/GSE7537.yaml +27 -0
- data/install_scripts/GEO/series/GSE7645.yaml +152 -0
- data/install_scripts/GEO/series/GSE7660.yaml +41 -0
- data/install_scripts/GEO/series/GSE7820.yaml +30 -0
- data/install_scripts/GEO/series/GSE79.yaml +32 -0
- data/install_scripts/GEO/series/GSE8035.yaml +19 -0
- data/install_scripts/GEO/series/GSE8088.yaml +13 -0
- data/install_scripts/GEO/series/GSE8089.yaml +19 -0
- data/install_scripts/GEO/series/GSE8111.yaml +15 -0
- data/install_scripts/GEO/series/GSE8237.yaml +35 -0
- data/install_scripts/GEO/series/GSE8326.yaml +37 -0
- data/install_scripts/GEO/series/GSE8399.yaml +13 -0
- data/install_scripts/GEO/series/GSE850.yaml +15 -0
- data/install_scripts/GEO/series/GSE8506.yaml +32 -0
- data/install_scripts/GEO/series/GSE8542.yaml +47 -0
- data/install_scripts/GEO/series/GSE8558.yaml +19 -0
- data/install_scripts/GEO/series/GSE8559.yaml +47 -0
- data/install_scripts/GEO/series/GSE8613.yaml +19 -0
- data/install_scripts/GEO/series/GSE8629.yaml +18 -0
- data/install_scripts/GEO/series/GSE8729.yaml +19 -0
- data/install_scripts/GEO/series/GSE8761.yaml +55 -0
- data/install_scripts/GEO/series/GSE8765.yaml +15 -0
- data/install_scripts/GEO/series/GSE8805.yaml +64 -0
- data/install_scripts/GEO/series/GSE8825.yaml +79 -0
- data/install_scripts/GEO/series/GSE8895.yaml +31 -0
- data/install_scripts/GEO/series/GSE8897.yaml +17 -0
- data/install_scripts/GEO/series/GSE8898.yaml +18 -0
- data/install_scripts/GEO/series/GSE8900.yaml +43 -0
- data/install_scripts/GEO/series/GSE8982.yaml +106 -0
- data/install_scripts/GEO/series/GSE920.yaml +20 -0
- data/install_scripts/GEO/series/GSE960.yaml +10 -0
- data/install_scripts/GEO/series/GSE961.yaml +14 -0
- data/install_scripts/GEO/series/GSE962.yaml +20 -0
- data/install_scripts/GEO/series/GSE963.yaml +14 -0
- data/install_scripts/GEO/series/GSE964.yaml +14 -0
- data/install_scripts/GEO/series/GSE965.yaml +14 -0
- data/install_scripts/GEO/series/GSE966.yaml +14 -0
- data/install_scripts/GEO/series/GSE993.yaml +9 -0
- data/lib/MARQ.rb +79 -0
- data/lib/MARQ/CustomDS.rb +99 -0
- data/lib/MARQ/GEO.rb +588 -0
- data/lib/MARQ/ID.rb +144 -0
- data/lib/MARQ/MADB.rb +238 -0
- data/lib/MARQ/annotations.rb +740 -0
- data/lib/MARQ/fdr.rb +177 -0
- data/lib/MARQ/main.rb +227 -0
- data/lib/MARQ/rankproduct.rb +146 -0
- data/lib/MARQ/score.rb +395 -0
- data/tasks/install.rake +21 -0
- metadata +588 -0
data/lib/MARQ/ID.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'MARQ'
|
2
|
+
require 'DBcache'
|
3
|
+
|
4
|
+
module ID
|
5
|
+
|
6
|
+
#DEFAULT_FORMAT_ALL = 'Entrez Gene Id'
|
7
|
+
DEFAULT_FORMAT_ALL = nil
|
8
|
+
DEFAULT_FORMATS = {}
|
9
|
+
|
10
|
+
def self.id_position(org, id)
|
11
|
+
@@supported[org] ||= Organism.supported_ids(org)
|
12
|
+
Organism.id_position(@@supported[org], id)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def self.DB_load(org, to = 0)
|
17
|
+
identifier_file = File.join(Rbbt.datadir,'organisms',org,'identifiers')
|
18
|
+
tablename = "ID_#{org.to_s.strip}_#{to.to_s.strip}"
|
19
|
+
|
20
|
+
if DBcache.has_table?(tablename)
|
21
|
+
DBcache.drop(tablename)
|
22
|
+
end
|
23
|
+
|
24
|
+
DBcache.create(tablename, 'CHAR(50)', ['CHAR(50)'])
|
25
|
+
|
26
|
+
total_fields = Organism.supported_ids(org).length
|
27
|
+
|
28
|
+
total_fields.times{|field|
|
29
|
+
File.open(identifier_file).each{|l|
|
30
|
+
next if l =~ /^#/
|
31
|
+
codes = l.chomp.split(/\t/)
|
32
|
+
|
33
|
+
native = codes[to]
|
34
|
+
next if native.nil? || native == ""
|
35
|
+
|
36
|
+
other = codes[field]
|
37
|
+
next if other.nil? || other == ""
|
38
|
+
|
39
|
+
|
40
|
+
#codes.collect{|c| c.split("|")}.flatten.compact.select{|c| c != ""}.uniq.each{|code|
|
41
|
+
other.split("|").each{|code|
|
42
|
+
begin
|
43
|
+
DBcache.fast_add(tablename, code.downcase, [native])
|
44
|
+
rescue
|
45
|
+
puts $!.message
|
46
|
+
end
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.translate_DB(org, genes, options = {})
|
53
|
+
to = options[:to] || DEFAULT_FORMATS[org] || DEFAULT_FORMAT_ALL
|
54
|
+
|
55
|
+
if to
|
56
|
+
to = id_position(org, to)
|
57
|
+
else
|
58
|
+
to = 0
|
59
|
+
end
|
60
|
+
|
61
|
+
tablename = "ID_#{org.to_s.strip}_#{to.to_s.strip}"
|
62
|
+
DB_load(org, to) unless DBcache.has_table?(tablename)
|
63
|
+
genes = genes.collect{|gene| gene.strip.downcase}
|
64
|
+
DBcache.load(tablename, genes).values_at(*genes).collect{|gene| gene.first if gene}
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.translate_index(org, genes, options = {})
|
68
|
+
genes = genes.collect{|gene| gene.strip if gene}
|
69
|
+
to = options[:to] || DEFAULT_FORMATS[org] || DEFAULT_FORMAT_ALL
|
70
|
+
from = options[:from]
|
71
|
+
@indexes ||= {}
|
72
|
+
if @indexes[org.to_s + to.to_s + from.to_s].nil?
|
73
|
+
puts "Loading #{ org }"
|
74
|
+
options = {}
|
75
|
+
options[:other] = [from] if from
|
76
|
+
options[:native] = to if to
|
77
|
+
options[:case_sensitive] = false
|
78
|
+
|
79
|
+
@indexes[org.to_s + to.to_s + from.to_s] = Organism.id_index(org, options)
|
80
|
+
end
|
81
|
+
|
82
|
+
index = @indexes[org.to_s + to.to_s + from.to_s]
|
83
|
+
|
84
|
+
return genes.collect{|code|
|
85
|
+
code.nil? || code == "" ? nil : index[code]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
@@supported = {}
|
90
|
+
def self.translate_grep(org, genes, options = {})
|
91
|
+
to = options[:to] || DEFAULT_FORMATS[org] || DEFAULT_FORMAT_ALL
|
92
|
+
from = options[:from]
|
93
|
+
|
94
|
+
options = {}
|
95
|
+
options[:extra] = [id_position(org, from)] if from
|
96
|
+
options[:native] = id_position(org,to) if to
|
97
|
+
options[:case_sensitive] ||= false
|
98
|
+
|
99
|
+
genes = genes.collect{|gene| gene.strip if gene}
|
100
|
+
genes = genes.collect{|gene| gene.downcase} unless options[:case_sensitive]
|
101
|
+
|
102
|
+
#genes_re = '\(^\|[' + "\t" + '|]\)' + genes.join('\|') + '\($\|[' + "\t" + '|]\)'
|
103
|
+
#cmd = "cat #{File.join(Rbbt.datadir,'organisms',org,'identifiers')}|grep -i '#{genes_re}'"
|
104
|
+
genes_re = '(?:^|[\t\|])(?:' + genes.join('|') + ')(?:$|[\t\|])'
|
105
|
+
cmd = "cat #{File.join(Rbbt.datadir,'organisms',org,'identifiers')}|ruby -lne 'puts $_ if $_ =~ /#{genes_re}/i'"
|
106
|
+
|
107
|
+
index = Index.index(IO::popen(cmd), options)
|
108
|
+
|
109
|
+
index.values_at(*genes)
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def self.AILUN_translate(platform, genes)
|
115
|
+
index = Open.to_hash("ftp://ailun.stanford.edu/ailun/annotation/geo/#{platform}.annot.gz", :fix => proc{|l| l.match(/^(.*?)\t(.*?)\t.*/); $1.downcase + "\t" + $2 })
|
116
|
+
index.values_at(*genes.collect{|code| code.strip.downcase}).collect{|v| v.nil? ? nil : v.first.first}
|
117
|
+
end
|
118
|
+
|
119
|
+
class << self
|
120
|
+
alias_method :translate, :translate_DB
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
if __FILE__ == $0
|
126
|
+
require 'benchmark'
|
127
|
+
|
128
|
+
|
129
|
+
Organism.all.each{|org|
|
130
|
+
ID.DB_load(org)
|
131
|
+
}
|
132
|
+
|
133
|
+
|
134
|
+
#num = 4000
|
135
|
+
|
136
|
+
#genes = File.open(Rbbt.datadir + '/organisms/human/identifiers').collect{|l| l.split("\t")[2] }.select{|c| c != ""}[1000,num]
|
137
|
+
|
138
|
+
#trans = nil
|
139
|
+
#puts Benchmark.measure{
|
140
|
+
# trans = ID.translate('human', genes)
|
141
|
+
#}
|
142
|
+
#p trans
|
143
|
+
#
|
144
|
+
end
|
data/lib/MARQ/MADB.rb
ADDED
@@ -0,0 +1,238 @@
|
|
1
|
+
require 'MARQ'
|
2
|
+
require 'MARQ/GEO'
|
3
|
+
require 'MARQ/CustomDS'
|
4
|
+
|
5
|
+
module MADB
|
6
|
+
module CustomDS
|
7
|
+
|
8
|
+
def self.save(dataset)
|
9
|
+
prefix = Object::CustomDS.path(dataset)
|
10
|
+
|
11
|
+
codes = File.open(prefix + '.codes').collect{|l| l.chomp.downcase}
|
12
|
+
|
13
|
+
DBcache.save(dataset + '_codes', codes)
|
14
|
+
|
15
|
+
experiments = File.open(prefix + '.experiments').collect{|l| l.chomp}
|
16
|
+
orders = File.open(prefix + '.orders').collect{|l| values = l.chomp.split(/\t/).collect{|v| v == "NA" ? nil : v.to_i };}
|
17
|
+
|
18
|
+
data = {}
|
19
|
+
codes.each_with_index{|code,i|
|
20
|
+
data[code.to_sym] = orders[i]
|
21
|
+
}
|
22
|
+
case
|
23
|
+
when codes.length < 65535
|
24
|
+
type = "SMALLINT UNSIGNED"
|
25
|
+
when codes.length < 16777215
|
26
|
+
type = "MEDIUMIN UNSIGNED"
|
27
|
+
else
|
28
|
+
type = "INT UNSIGNED"
|
29
|
+
end
|
30
|
+
|
31
|
+
DBcache.save(dataset + '_experiments', experiments)
|
32
|
+
DBcache.save(dataset, data, [type] * orders.first.length)
|
33
|
+
|
34
|
+
return unless Object::CustomDS::has_cross_platform?(dataset)
|
35
|
+
dataset = dataset + '_cross_platform'
|
36
|
+
prefix = Object::CustomDS.path(dataset)
|
37
|
+
|
38
|
+
codes = File.open(prefix + '.codes').collect{|l| l.chomp.downcase}
|
39
|
+
|
40
|
+
DBcache.save(dataset + '_codes', codes)
|
41
|
+
|
42
|
+
experiments = File.open(prefix + '.experiments').collect{|l| l.chomp}
|
43
|
+
orders = File.open(prefix + '.orders').collect{|l| values = l.chomp.split(/\t/).collect{|v| v == "NA" ? nil : v.to_i };}
|
44
|
+
|
45
|
+
data = {}
|
46
|
+
codes.each_with_index{|code,i|
|
47
|
+
data[code.to_sym] = orders[i]
|
48
|
+
}
|
49
|
+
case
|
50
|
+
when codes.length < 65535
|
51
|
+
type = "SMALLINT UNSIGNED"
|
52
|
+
when codes.length < 16777215
|
53
|
+
type = "MEDIUMIN UNSIGNED"
|
54
|
+
else
|
55
|
+
type = "INT UNSIGNED"
|
56
|
+
end
|
57
|
+
|
58
|
+
DBcache.save(dataset + '_experiments', experiments)
|
59
|
+
DBcache.save(dataset, data, [type] * orders.first.length)
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.positions(dataset, genes)
|
64
|
+
return [{},[]] if genes.empty?
|
65
|
+
genes = genes.collect{|gene| gene.downcase.strip}
|
66
|
+
|
67
|
+
platform_entries = platform_entries(dataset + '_codes').to_f
|
68
|
+
|
69
|
+
data = {}
|
70
|
+
matched = []
|
71
|
+
|
72
|
+
gene_positions = DBcache.load(dataset, genes)
|
73
|
+
matched ||= gene_positions.keys
|
74
|
+
|
75
|
+
experiments = DBcache.load(dataset + '_experiments').sort{|a,b|
|
76
|
+
a[0].to_i <=> b[0].to_i
|
77
|
+
}.collect{|p|
|
78
|
+
Object::GEO::clean(dataset) + ": " + p[1].first
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
matched = (matched + gene_positions.keys).uniq
|
83
|
+
scale = (0..experiments.length - 1).collect{|i|
|
84
|
+
rows = DBcache.num_rows(dataset, "C#{i}");
|
85
|
+
if rows > 0
|
86
|
+
platform_entries / rows
|
87
|
+
else
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
}
|
91
|
+
|
92
|
+
gene_x_experiment = gene_positions.values
|
93
|
+
|
94
|
+
experiment_x_gene = gene_x_experiment.transpose
|
95
|
+
|
96
|
+
experiments.each_with_index{|experiment, i|
|
97
|
+
next if scale[i].nil? || experiment_x_gene[i].nil?
|
98
|
+
values = experiment_x_gene[i].collect{|v| v.nil? ? nil : (v.to_f * scale[i]).to_i}
|
99
|
+
data[experiment] = values
|
100
|
+
}
|
101
|
+
|
102
|
+
[data, matched]
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.platform_entries(platform)
|
106
|
+
DBcache.num_rows(platform)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
module GEO
|
115
|
+
|
116
|
+
def self.saveGPL(platform)
|
117
|
+
datasets = Object::GEO.platform_datasets(platform).sort
|
118
|
+
return if datasets.empty?
|
119
|
+
|
120
|
+
|
121
|
+
codes = File.open(File.join(Object::GEO.platform_path(platform),'codes')).collect{|l| l.chomp.downcase}
|
122
|
+
|
123
|
+
DBcache.save(platform, codes)
|
124
|
+
|
125
|
+
datasets.sort.each{|dataset|
|
126
|
+
path = Object::GEO.dataset_path(dataset)
|
127
|
+
experiments = File.open(path + '.experiments').collect{|l| l.chomp}
|
128
|
+
orders = File.open(path + '.orders').collect{|l| values = l.chomp.split(/\t/).collect{|v| v == "NA" ? nil : v.to_i };}
|
129
|
+
|
130
|
+
data = {}
|
131
|
+
codes.each_with_index{|code,i|
|
132
|
+
data[code.to_sym] = orders[i]
|
133
|
+
}
|
134
|
+
case
|
135
|
+
when codes.length < 65535
|
136
|
+
type = "SMALLINT UNSIGNED"
|
137
|
+
when codes.length < 16777215
|
138
|
+
type = "MEDIUMINT UNSIGNED"
|
139
|
+
else
|
140
|
+
type = "INT UNSIGNED"
|
141
|
+
end
|
142
|
+
|
143
|
+
DBcache.save(dataset + '_experiments', experiments)
|
144
|
+
DBcache.save(dataset, data, [type] * orders.first.length)
|
145
|
+
}
|
146
|
+
|
147
|
+
|
148
|
+
return unless File.exist?(File.join(Object::GEO.platform_path(platform),'cross_platform'))
|
149
|
+
codes = File.open(File.join(Object::GEO.platform_path(platform),'cross_platform')).collect{|l| l.chomp.downcase}
|
150
|
+
|
151
|
+
DBcache.save(platform + '_cross_platform', codes)
|
152
|
+
|
153
|
+
Progress.monitor("Saving #{ platform }")
|
154
|
+
datasets.sort.each{|dataset|
|
155
|
+
path = Object::GEO.dataset_path(dataset)
|
156
|
+
next unless File.exists?(path + '_cross_platform.experiments')
|
157
|
+
experiments = File.open(path + '_cross_platform.experiments').collect{|l| l.chomp}
|
158
|
+
orders = File.open(path + '_cross_platform.orders').collect{|l| values = l.chomp.split(/\t/).collect{|v| v == "NA" ? nil : v.to_i };}
|
159
|
+
|
160
|
+
data = {}
|
161
|
+
codes.each_with_index{|code,i|
|
162
|
+
data[code.to_sym] = orders[i]
|
163
|
+
}
|
164
|
+
|
165
|
+
case
|
166
|
+
when codes.length < 65535
|
167
|
+
type = "SMALLINT UNSIGNED"
|
168
|
+
when codes.length < 16777215
|
169
|
+
type = "MEDIUMIN UNSIGNED"
|
170
|
+
else
|
171
|
+
type = "INT UNSIGNED"
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
DBcache.save(dataset + '_cross_platform_experiments', experiments)
|
176
|
+
DBcache.save(dataset + '_cross_platform', data, [type] * orders.first.length)
|
177
|
+
}
|
178
|
+
end
|
179
|
+
|
180
|
+
def self.positions(platform, genes)
|
181
|
+
return [{},[]] if genes.empty?
|
182
|
+
genes = genes.collect{|gene| gene.downcase.strip}
|
183
|
+
|
184
|
+
datasets = Object::GEO.platform_datasets(platform).sort
|
185
|
+
platform_entries = platform_entries(platform).to_f
|
186
|
+
|
187
|
+
data = {}
|
188
|
+
matched = nil
|
189
|
+
|
190
|
+
datasets.each{|dataset|
|
191
|
+
dataset += '_cross_platform' if Object::GEO::is_cross_platform?(platform)
|
192
|
+
gene_positions = DBcache.load(dataset, genes)
|
193
|
+
matched ||= gene_positions.keys
|
194
|
+
|
195
|
+
experiments = DBcache.load(dataset + '_experiments').sort{|a,b|
|
196
|
+
a[0].to_i <=> b[0].to_i
|
197
|
+
}.collect{|p|
|
198
|
+
Object::GEO::clean(dataset) + ": " + p[1].first
|
199
|
+
}
|
200
|
+
|
201
|
+
scale = (0..experiments.length - 1).collect{|i|
|
202
|
+
rows = DBcache.num_rows(dataset, "C#{i}");
|
203
|
+
if rows > 0
|
204
|
+
platform_entries / rows
|
205
|
+
else
|
206
|
+
nil
|
207
|
+
end
|
208
|
+
}
|
209
|
+
|
210
|
+
gene_x_experiment = gene_positions.values
|
211
|
+
|
212
|
+
experiment_x_gene = gene_x_experiment.transpose
|
213
|
+
|
214
|
+
experiments.each_with_index{|experiment, i|
|
215
|
+
next if scale[i].nil? || experiment_x_gene[i].nil?
|
216
|
+
values = experiment_x_gene[i].collect{|v| v.nil? ? nil : (v.to_f * scale[i]).to_i}
|
217
|
+
data[experiment] = values
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
221
|
+
[data, matched]
|
222
|
+
end
|
223
|
+
|
224
|
+
def self.platform_entries(platform)
|
225
|
+
DBcache.num_rows(platform)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
if __FILE__ == $0
|
232
|
+
#CustomDS::datasets('sgd').each{|d| MADB::CustomDS::save(d)}
|
233
|
+
|
234
|
+
require 'pp'
|
235
|
+
pp MADB::GEO::positions('GPL91_cross_platform', %w(2778))[0].select{|k,v| k =~ /GDS989/}.sort
|
236
|
+
#p MADB::CustomDS::positions("HaploidData",%w( YMR261c YDL140c YIL122w YPL093w YHR211w YDL142c YHR106w YOR103c YDR233c YLR181c yomeman))
|
237
|
+
#p MADB::CustomDS::positions("HaploidData_cross_platform",%w( S000002685 S000001149 S000003068 S000003153 S000003355 S000000127 S000004444 S000004875 S000001702 S000005843 S000000862))
|
238
|
+
end
|
@@ -0,0 +1,740 @@
|
|
1
|
+
require 'inline'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
require 'MARQ'
|
5
|
+
require 'rbbt/bow/dictionary'
|
6
|
+
require 'MARQ/fdr'
|
7
|
+
|
8
|
+
module Annotations
|
9
|
+
class << self
|
10
|
+
inline do |builder|
|
11
|
+
builder.c_raw <<-EOC
|
12
|
+
|
13
|
+
/**
|
14
|
+
* Compute log(k!)
|
15
|
+
* @param k The value k.
|
16
|
+
* @return The result.
|
17
|
+
*/
|
18
|
+
double lFactorial(double k)
|
19
|
+
{
|
20
|
+
double r = 0;
|
21
|
+
int i;
|
22
|
+
for(i=2 ; i<=(int)k ; i++)
|
23
|
+
{
|
24
|
+
r = r + (double)(log((double)i));
|
25
|
+
}
|
26
|
+
return r;
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
/**
|
32
|
+
* Compute the log(binom(n,k))
|
33
|
+
* @param n The number of possible items.
|
34
|
+
* @param k The number of selected items.
|
35
|
+
* @return The result.
|
36
|
+
*/
|
37
|
+
double lBinom(double n, double k)
|
38
|
+
{
|
39
|
+
long i;
|
40
|
+
double r = 0;
|
41
|
+
|
42
|
+
if(n > n-k){
|
43
|
+
k = n-k;
|
44
|
+
}
|
45
|
+
|
46
|
+
for(i = (long)n ; i> (n-k) ; i--)
|
47
|
+
{
|
48
|
+
r = r + log((double)i);
|
49
|
+
}
|
50
|
+
|
51
|
+
r = r - lFactorial(k);
|
52
|
+
|
53
|
+
return r;
|
54
|
+
}
|
55
|
+
EOC
|
56
|
+
|
57
|
+
builder.c <<-EOC
|
58
|
+
/**
|
59
|
+
* * Compute the Hypergeometric accumulated value.
|
60
|
+
* * @param total => total size
|
61
|
+
* * @param support => total support
|
62
|
+
* * @param list => selected list size,
|
63
|
+
* * @param found => support
|
64
|
+
* * @return The result
|
65
|
+
* */
|
66
|
+
double hypergeometric(double total, double support, double list, double found)
|
67
|
+
{
|
68
|
+
double other = total - support;
|
69
|
+
|
70
|
+
double top = list;
|
71
|
+
if(support < list){
|
72
|
+
top = support;
|
73
|
+
}
|
74
|
+
|
75
|
+
double log_n_choose_k = lBinom(total,list);
|
76
|
+
|
77
|
+
double lfoo = lBinom(support,top) + lBinom(other, list-top);
|
78
|
+
|
79
|
+
double sum = 0;
|
80
|
+
int i;
|
81
|
+
for (i = (int)top; i >= found; i-- )
|
82
|
+
{
|
83
|
+
sum = sum + exp(lfoo - log_n_choose_k);
|
84
|
+
if ( i > found)
|
85
|
+
{
|
86
|
+
lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
|
87
|
+
}
|
88
|
+
}
|
89
|
+
return sum;
|
90
|
+
}
|
91
|
+
EOC
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.exp2gds(experiment)
|
96
|
+
experiment =~ /(.*?):/
|
97
|
+
$1
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.compare(a,b)
|
101
|
+
case
|
102
|
+
when a[:pvalue] < b[:pvalue]
|
103
|
+
-1
|
104
|
+
when a[:pvalue] > b[:pvalue]
|
105
|
+
1
|
106
|
+
when a[:pvalue] == b[:pvalue]
|
107
|
+
b[:score].abs <=> a[:score].abs
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
RANK_SIZE_BINS = %w(1 2 3 4 5 7 10 15 20 30 40 50 65 80 100 125 150 175 200 250 300 350 400 450 500 600 700 800 900 1000 1500 2000 2500 3000)
|
112
|
+
|
113
|
+
def self.enrichment_rank(annotations, ranks, options = {})
|
114
|
+
dict_options = {
|
115
|
+
:dict_options => {:low => 0, :hi => 0.5, :limit => 100000}
|
116
|
+
}.merge(options)[:dict_options]
|
117
|
+
positions = {}
|
118
|
+
found_datasets = []
|
119
|
+
|
120
|
+
dict = Dictionary::TF_IDF.new
|
121
|
+
ranks.each_with_index{|experiment, rank|
|
122
|
+
info = annotations[experiment]
|
123
|
+
|
124
|
+
dataset_terms = info[:dataset]
|
125
|
+
signature_terms = info[:signature]
|
126
|
+
|
127
|
+
dataset = exp2gds experiment
|
128
|
+
|
129
|
+
terms = signature_terms
|
130
|
+
terms += dataset_terms
|
131
|
+
|
132
|
+
term_count = {}
|
133
|
+
terms.each{|term|
|
134
|
+
term_count[term] ||= 0
|
135
|
+
term_count[term] += 1
|
136
|
+
}
|
137
|
+
dict.add(term_count)
|
138
|
+
}
|
139
|
+
|
140
|
+
best = dict.best(dict_options).keys
|
141
|
+
|
142
|
+
found_datasets = []
|
143
|
+
ranks.each_with_index{|experiment, rank|
|
144
|
+
info = annotations[experiment]
|
145
|
+
|
146
|
+
dataset_terms = info[:dataset]
|
147
|
+
signature_terms = info[:signature]
|
148
|
+
|
149
|
+
dataset = exp2gds experiment
|
150
|
+
|
151
|
+
terms = signature_terms
|
152
|
+
|
153
|
+
if ! found_datasets.include? dataset
|
154
|
+
terms += dataset_terms
|
155
|
+
found_datasets << dataset
|
156
|
+
end
|
157
|
+
|
158
|
+
terms.uniq.each{|term|
|
159
|
+
next if not best.include? term
|
160
|
+
positions[term] ||= []
|
161
|
+
positions[term] << rank
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
scores = []
|
166
|
+
|
167
|
+
|
168
|
+
sizes = {}
|
169
|
+
RANK_SIZE_BINS.each{|size| sizes[size.to_i] = []}
|
170
|
+
|
171
|
+
|
172
|
+
# For each term compute the rank score. Also, place it in the closest size
|
173
|
+
# bin for the permutations.
|
174
|
+
best.each_with_index{|term, pos|
|
175
|
+
if positions[term]
|
176
|
+
list = positions[term]
|
177
|
+
|
178
|
+
# place it on the size bin
|
179
|
+
found = false
|
180
|
+
sizes.keys.sort.each_with_index{|size,i|
|
181
|
+
next if found
|
182
|
+
if list.length < size
|
183
|
+
found = true
|
184
|
+
sizes[sizes.keys.sort[i-1]] << pos
|
185
|
+
end
|
186
|
+
}
|
187
|
+
sizes[sizes.keys.sort.last] << pos if !found
|
188
|
+
|
189
|
+
scores << Score::score(list, ranks.length, 0)[:score]
|
190
|
+
else # it has no score
|
191
|
+
scores << nil
|
192
|
+
end
|
193
|
+
}
|
194
|
+
|
195
|
+
info = {}
|
196
|
+
|
197
|
+
# Go through all the size bins, run the permutations and assign the pvalues
|
198
|
+
# to all terms in the bin.
|
199
|
+
sizes.keys.each{|size|
|
200
|
+
next if size == 1
|
201
|
+
next if sizes[size].empty?
|
202
|
+
|
203
|
+
# This are the actual scores for the terms in the bin
|
204
|
+
sub_list_scores = sizes[size].collect{|pos| scores[pos] || 0}
|
205
|
+
|
206
|
+
# Compute the pvalues for all the terms in the bin. The size of the
|
207
|
+
# permutation list is that of the bin
|
208
|
+
pvalues = Score::pvalues(sub_list_scores, size, 0, ranks.length)
|
209
|
+
|
210
|
+
# Save the information from the terms, score, hits, and pvalues.
|
211
|
+
sizes[size].zip(pvalues).each{|p|
|
212
|
+
pos = p[0]
|
213
|
+
pvalue = p[1]
|
214
|
+
score = scores[pos]
|
215
|
+
next if score < 0
|
216
|
+
|
217
|
+
term = best[pos]
|
218
|
+
hits = positions[term].nil? ? 0 : positions[term].length
|
219
|
+
|
220
|
+
info[term] = {:score => score, :hits => hits, :pvalue => pvalue}
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
info
|
225
|
+
end
|
226
|
+
|
227
|
+
def self.enrichment_hypergeometric(annotations, relevant, options)
|
228
|
+
dict_options = {
|
229
|
+
:dict_options => {:low => 0, :hi => 0.5, :limit => 100000}
|
230
|
+
}.merge(options)[:dict_options]
|
231
|
+
positions = {}
|
232
|
+
found_datasets = []
|
233
|
+
|
234
|
+
dict = Dictionary::TF_IDF.new
|
235
|
+
ranks.each_with_index{|experiment, rank|
|
236
|
+
info = annotations[experiment]
|
237
|
+
|
238
|
+
dataset_terms = info[:dataset]
|
239
|
+
signature_terms = info[:signature]
|
240
|
+
|
241
|
+
dataset = exp2gds experiment
|
242
|
+
|
243
|
+
terms = signature_terms
|
244
|
+
terms += dataset_terms
|
245
|
+
|
246
|
+
term_count = {}
|
247
|
+
terms.each{|term|
|
248
|
+
term_count[term] ||= 0
|
249
|
+
term_count[term] += 1
|
250
|
+
}
|
251
|
+
dict.add(term_count)
|
252
|
+
}
|
253
|
+
|
254
|
+
best = dict.best(dict_options).keys
|
255
|
+
|
256
|
+
terms = {}
|
257
|
+
found_datasets = []
|
258
|
+
annotations.each{|experiment, info|
|
259
|
+
dataset_terms = info[:dataset]
|
260
|
+
signature_terms = info[:signature]
|
261
|
+
|
262
|
+
dataset = exp2gds experiment
|
263
|
+
|
264
|
+
signature_terms.each{|term|
|
265
|
+
next if ! best.include? term
|
266
|
+
terms[term] ||= {:relevant => 0, :total => 0}
|
267
|
+
terms[term][:total] += 1
|
268
|
+
terms[term][:relevant] += 1 if relevant.include? experiment
|
269
|
+
}
|
270
|
+
|
271
|
+
next if found_datasets.include? dataset
|
272
|
+
found_datasets << dataset
|
273
|
+
|
274
|
+
dataset_terms.each{|term|
|
275
|
+
next if ! best.include? term
|
276
|
+
terms[term] ||= {:relevant => 0, :total => 0}
|
277
|
+
terms[term][:total] += 1
|
278
|
+
terms[term][:relevant] += 1 if relevant.include? experiment
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
|
283
|
+
total = annotations.keys.length
|
284
|
+
list = relevant.length
|
285
|
+
|
286
|
+
terms.each{|term, info|
|
287
|
+
info[:pvalue] = Annotations.hypergeometric(total,info[:total],list, info[:relevant])
|
288
|
+
}
|
289
|
+
|
290
|
+
terms
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
|
295
|
+
def self.annotations(scores, type, pvalue = 0.05, algorithm = :rank)
|
296
|
+
annot = {}
|
297
|
+
relevant = []
|
298
|
+
|
299
|
+
dict_options = {}
|
300
|
+
if type == "Words"
|
301
|
+
dict_options = {:low => 0, :hi => 0.05, :limit => 100000}
|
302
|
+
else
|
303
|
+
dict_options = {:low => 0, :hi => 0.5, :limit => 100000}
|
304
|
+
end
|
305
|
+
|
306
|
+
case
|
307
|
+
when type =~ /^(.*)_direct$/
|
308
|
+
side = :direct
|
309
|
+
type = $1
|
310
|
+
when type =~ /^(.*)_inverse$/
|
311
|
+
side = :inverse
|
312
|
+
type = $1
|
313
|
+
end
|
314
|
+
|
315
|
+
|
316
|
+
terms_cache = {}
|
317
|
+
scores.each{|experiment, info|
|
318
|
+
dataset = experiment.match(/^(.*?): /)[1]
|
319
|
+
name = $'.strip
|
320
|
+
case
|
321
|
+
when side.nil?
|
322
|
+
term_file = File.join(MARQ.datadir, MARQ.platform_type(dataset).to_s , 'annotations',type, dataset)
|
323
|
+
when side == :direct && info[:score] > 0 || side == :inverse && info[:score] < 0
|
324
|
+
term_file = File.join(MARQ.datadir, MARQ.platform_type(dataset).to_s , 'annotations',type + '_up', dataset)
|
325
|
+
else
|
326
|
+
term_file = File.join(MARQ.datadir, MARQ.platform_type(dataset).to_s , 'annotations',type + '_down', dataset)
|
327
|
+
end
|
328
|
+
|
329
|
+
if File.exist? term_file
|
330
|
+
terms_cache[term_file] ||= YAML::load(File.open(term_file))
|
331
|
+
terms = terms_cache[term_file]
|
332
|
+
annot[experiment] = {:dataset => (terms[:dataset] || []), :signature => (terms[name] || [])}
|
333
|
+
else
|
334
|
+
annot[experiment] = {:dataset => [], :signature => []}
|
335
|
+
end
|
336
|
+
|
337
|
+
relevant << experiment if info[:pvalue] <= pvalue
|
338
|
+
}
|
339
|
+
|
340
|
+
if algorithm == :rank
|
341
|
+
ranks = scores.sort{|a,b| compare(a[1],b[1]) }.collect{|p| p[0]}
|
342
|
+
terms = enrichment_rank(annot, ranks, dict_options)
|
343
|
+
else
|
344
|
+
terms = enrichment_hypergeometric(annot, relevant, dict_options)
|
345
|
+
end
|
346
|
+
|
347
|
+
merged_annotations = {}
|
348
|
+
annot.each{|key, info|
|
349
|
+
merged_annotations[key] = info[:dataset] + info[:signature]
|
350
|
+
}
|
351
|
+
[merged_annotations, terms]
|
352
|
+
end
|
353
|
+
|
354
|
+
module GO
|
355
|
+
module Genecodis
|
356
|
+
ORGS = {
|
357
|
+
'sgd' => 'Sc' ,
|
358
|
+
'rgd' => 'Rn' ,
|
359
|
+
'mgi' => 'Mm' ,
|
360
|
+
'pombe' => 'Sp' ,
|
361
|
+
'cgd' => 'Ca' ,
|
362
|
+
'human' => 'Hs' ,
|
363
|
+
'tair' => 'At' ,
|
364
|
+
'worm' => 'Ce' ,
|
365
|
+
}
|
366
|
+
|
367
|
+
FIELDS = %w(Id Items S TS Hyp Hyp_c Genes).collect{|f| f.downcase.to_sym}
|
368
|
+
|
369
|
+
module WS
|
370
|
+
def self.driver
|
371
|
+
require 'soap/wsdlDriver'
|
372
|
+
wsdl_url = File.join('http://genecodis.dacya.ucm.es/static/wsdl/genecodisWS.wsdl')
|
373
|
+
driver = SOAP::WSDLDriverFactory.new(wsdl_url).create_rpc_driver
|
374
|
+
driver
|
375
|
+
end
|
376
|
+
|
377
|
+
def self.analysis(org, list)
|
378
|
+
puts "GO for #{ org } #{list.length} genes"
|
379
|
+
|
380
|
+
gc_org = ORGS[org.to_s]
|
381
|
+
return [] if gc_org.nil?
|
382
|
+
|
383
|
+
job_id = driver.analyze(gc_org,2,0,-1,3,list,%w(GO_Biological_Process ),[])
|
384
|
+
|
385
|
+
|
386
|
+
while (stat = driver.status(job_id)) == 1
|
387
|
+
sleep 1
|
388
|
+
end
|
389
|
+
|
390
|
+
if stat < 0
|
391
|
+
return []
|
392
|
+
else
|
393
|
+
lines = driver.results(job_id).collect{|l| l.chomp}
|
394
|
+
lines.shift
|
395
|
+
lines.collect{|l| Hash[*FIELDS.zip(l.chomp.split(/\t/)).flatten]}
|
396
|
+
end
|
397
|
+
rescue
|
398
|
+
puts $!.message
|
399
|
+
puts $!.backtrace
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
module Local
|
404
|
+
|
405
|
+
def self.analysis(org,list, slim = false)
|
406
|
+
require 'genecodis'
|
407
|
+
|
408
|
+
gc_org = ORGS[org.to_s]
|
409
|
+
if slim
|
410
|
+
groups = ['GOSlim_Process']
|
411
|
+
else
|
412
|
+
groups = ['GO_Biological_Process']
|
413
|
+
end
|
414
|
+
|
415
|
+
job_id = Object::Genecodis.analyze(gc_org,2,0,-1,3,list,groups,nil)
|
416
|
+
return [] if job_id.nil?
|
417
|
+
|
418
|
+
while (stat = Object::Genecodis.status(job_id)) == 1
|
419
|
+
sleep 0.5
|
420
|
+
end
|
421
|
+
|
422
|
+
if stat < 0
|
423
|
+
return []
|
424
|
+
else
|
425
|
+
res = Object::Genecodis.results(job_id)
|
426
|
+
return [] if res.nil?
|
427
|
+
lines = res.collect{|l| l.chomp}
|
428
|
+
lines.shift
|
429
|
+
lines.collect{|l| Hash[*FIELDS.zip(l.chomp.split(/\t/)).flatten]}
|
430
|
+
end
|
431
|
+
rescue
|
432
|
+
puts $!.message
|
433
|
+
puts $!.backtrace
|
434
|
+
end
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
def self.get_genes_nth(dataset, num_genes)
|
439
|
+
path = MARQ.dataset_path(dataset)
|
440
|
+
|
441
|
+
experiments = File.open(path + '.experiments').collect{|l| l.chomp.strip}
|
442
|
+
genes = File.open(path + '.codes').collect{|l| l.chomp.strip}
|
443
|
+
total_genes = genes.length
|
444
|
+
|
445
|
+
genes_up = {}
|
446
|
+
genes_down = {}
|
447
|
+
experiments.each{|exp| genes_up[exp] = []; genes_down[exp] = []}
|
448
|
+
|
449
|
+
File.open(path + '.orders').each_with_index{|l, i|
|
450
|
+
values = l.chomp.split(/\t/)
|
451
|
+
experiments.zip(values).each{|p|
|
452
|
+
name = p.first
|
453
|
+
value = p.last
|
454
|
+
next if p.last == "NA"
|
455
|
+
genes_up[name] << genes[i] if value.to_i < num_genes
|
456
|
+
genes_down[name] << genes[i] if value.to_i > total_genes - num_genes
|
457
|
+
}
|
458
|
+
}
|
459
|
+
|
460
|
+
{:up => genes_up, :down => genes_down}
|
461
|
+
end
|
462
|
+
|
463
|
+
def self.get_genes(dataset, options = {})
|
464
|
+
fdr, cut_off, folds, do_folds, nth_genes = {
|
465
|
+
:fdr => false,
|
466
|
+
:cut_off => 0.05,
|
467
|
+
:folds => 2.5,
|
468
|
+
:do_folds => true,
|
469
|
+
:nth_genes => 0,
|
470
|
+
}.merge(options).values_at(:fdr, :cut_off, :folds, :do_folds, :nth_genes)
|
471
|
+
|
472
|
+
if nth_genes > 0
|
473
|
+
return get_genes_nth(dataset, nth_genes)
|
474
|
+
end
|
475
|
+
|
476
|
+
|
477
|
+
path = MARQ.dataset_path(dataset)
|
478
|
+
|
479
|
+
experiments = File.open(path + '.experiments').collect{|l| l.chomp.strip}
|
480
|
+
genes = File.open(path + '.codes').collect{|l| l.chomp.strip}
|
481
|
+
|
482
|
+
|
483
|
+
experiments_ts = experiments.select{|exp| exp !~ /\[ratio\]/}
|
484
|
+
experiments_fold = experiments.select{|exp| exp =~ /\[ratio\]/}
|
485
|
+
|
486
|
+
experiments_fold = [] if ! do_folds
|
487
|
+
|
488
|
+
values_up = {}
|
489
|
+
values_down = {}
|
490
|
+
experiments.each{|exp| values_up[exp] = []; values_down[exp] = []}
|
491
|
+
|
492
|
+
File.open(path + '.pvalues').each_with_index{|l, i|
|
493
|
+
values = l.chomp.split(/\t/)
|
494
|
+
experiments_ts.zip(values).each{|p|
|
495
|
+
name = p.first
|
496
|
+
value = p.last == "NA" ? 1.0 : p.last.to_f
|
497
|
+
values_up[name] << (value > 0 ? value : 1.0)
|
498
|
+
values_down[name] << (value < 0 ? - value : 1.0)
|
499
|
+
}
|
500
|
+
}
|
501
|
+
|
502
|
+
File.open(path + '.logratios').each_with_index{|l, i|
|
503
|
+
values = l.chomp.split(/\t/)
|
504
|
+
experiments.zip(values).each{|p|
|
505
|
+
name = p.first
|
506
|
+
next unless experiments_fold.include? name
|
507
|
+
value = p.last == "NA" ? 0 : p.last.to_f
|
508
|
+
values_up[name] << (p.last > 0 ? value : 0)
|
509
|
+
values_down[name] << (p.last < 0 ? - value : 0)
|
510
|
+
}
|
511
|
+
}
|
512
|
+
|
513
|
+
genes_up = {}
|
514
|
+
genes_down = {}
|
515
|
+
|
516
|
+
threshold = cut_off
|
517
|
+
values_up.each{|experiment, values|
|
518
|
+
genes_up[experiment] = []
|
519
|
+
if experiments_ts.include? experiment
|
520
|
+
if fdr
|
521
|
+
threshold = FDR.step_up(values.sort, cut_off)
|
522
|
+
next if threshold == 0.0
|
523
|
+
end
|
524
|
+
values.each_with_index{|value, i| genes_up[experiment] << genes[i] if value < threshold}
|
525
|
+
elsif experiment_fold.include? experiment
|
526
|
+
values.each_with_index{|value, i| genes_up[experiment] << genes[i] if value < folds}
|
527
|
+
end
|
528
|
+
}
|
529
|
+
values_down.each{|experiment, values|
|
530
|
+
genes_down[experiment] = []
|
531
|
+
if experiments_ts.include? experiment
|
532
|
+
if fdr
|
533
|
+
threshold = FDR.step_up(values.sort, cut_off)
|
534
|
+
next if threshold == 0.0
|
535
|
+
end
|
536
|
+
values.each_with_index{|value, i| genes_down[experiment] << genes[i] if value < threshold}
|
537
|
+
elsif experiment_fold.include? experiment
|
538
|
+
values.each_with_index{|value, i| genes_down[experiment] << genes[i] if value < folds}
|
539
|
+
end
|
540
|
+
}
|
541
|
+
|
542
|
+
|
543
|
+
{:up => genes_up, :down => genes_down}
|
544
|
+
end
|
545
|
+
|
546
|
+
def self.get_genes_old(dataset, cut_off = 0.1, fdr = false)
|
547
|
+
|
548
|
+
path = MARQ.dataset_path(dataset)
|
549
|
+
|
550
|
+
experiments = File.open(path + '.experiments').collect{|l| l.chomp.strip}.select{|name| !name.match(/\[ratio\]/)}
|
551
|
+
genes = File.open(path + '.codes').collect{|l| l.chomp.strip}
|
552
|
+
|
553
|
+
|
554
|
+
values_up = {}
|
555
|
+
values_down = {}
|
556
|
+
experiments.each{|exp| values_up[exp] = []; values_down[exp] = []}
|
557
|
+
|
558
|
+
|
559
|
+
if File.exist?(path + '.pvalues')
|
560
|
+
File.open(path + '.pvalues').each_with_index{|l, i|
|
561
|
+
values = l.chomp.split(/\t/)
|
562
|
+
experiments.zip(values).each{|p|
|
563
|
+
value = p.last == "NA" ? 1.0 : p.last.to_f
|
564
|
+
values_up[p.first] << (value > 0 ? value : 1.0)
|
565
|
+
values_down[p.first] << (value < 0 ? - value : 1.0)
|
566
|
+
}
|
567
|
+
}
|
568
|
+
end
|
569
|
+
|
570
|
+
genes_up = {}
|
571
|
+
genes_down = {}
|
572
|
+
|
573
|
+
threshold = cut_off
|
574
|
+
values_up.each{|experiment, values|
|
575
|
+
genes_up[experiment] = []
|
576
|
+
if fdr
|
577
|
+
threshold = FDR.step_up(values.sort, cut_off)
|
578
|
+
next if threshold == 0.0
|
579
|
+
end
|
580
|
+
values.each_with_index{|value, i| genes_up[experiment] << genes[i] if value < threshold}
|
581
|
+
}
|
582
|
+
values_down.each{|experiment, values|
|
583
|
+
genes_down[experiment] = []
|
584
|
+
if fdr
|
585
|
+
threshold = FDR.step_up(values.sort, cut_off)
|
586
|
+
next if threshold == 0.0
|
587
|
+
end
|
588
|
+
values.each_with_index{|value, i| genes_down[experiment] << genes[i] if value < threshold}
|
589
|
+
}
|
590
|
+
|
591
|
+
|
592
|
+
{:up => genes_up, :down => genes_down}
|
593
|
+
end
|
594
|
+
end
|
595
|
+
module UMLS
|
596
|
+
SEMANTIC_TYPES="T020,T100,T116,T123,T023,T118,T043,T049,T103,T200,T060,T047,T203,T126,T050,T131,T125,T129,T037,T197,T191,T114,T110,T167,T024"
|
597
|
+
|
598
|
+
def self.OBA(text)
|
599
|
+
|
600
|
+
res = Net::HTTP.post_form(URI.parse('http://rest.bioontology.org/obs_hibernate/annotator'),
|
601
|
+
{
|
602
|
+
'longestOnly'=> true,
|
603
|
+
'wholeWordOnly'=> true,
|
604
|
+
'withDefaultStopWords' => true,
|
605
|
+
'scored' => true,
|
606
|
+
'ontologiesToExpand' => 'null',
|
607
|
+
'ontologiesToKeepInResult' => "",
|
608
|
+
'levelMax' => 0,
|
609
|
+
'levelMin' => 0,
|
610
|
+
'textToAnnotate' => text.gsub(/\s/,' '),
|
611
|
+
'semanticTypes' => SEMANTIC_TYPES,
|
612
|
+
'mappingTypes' => 'null',
|
613
|
+
'format' => 'tabDelimited'
|
614
|
+
})
|
615
|
+
|
616
|
+
res.body.collect{|l| l.split(/\t/)}.select{|v| v[0].to_i > 0}.collect{|v| v[2].sub(/\W*NOS/,'').downcase}.select{|w| w !~ /^\d+$/}.sort.uniq
|
617
|
+
end
|
618
|
+
|
619
|
+
end
|
620
|
+
|
621
|
+
|
622
|
+
end
|
623
|
+
|
624
|
+
|
625
|
+
|
626
|
+
if __FILE__ == $0
|
627
|
+
require 'pp'
|
628
|
+
|
629
|
+
|
630
|
+
|
631
|
+
exit
|
632
|
+
#Annotations::GO::Genecodis::Local.init
|
633
|
+
|
634
|
+
#genes = Annotations::GO::get_genes('GDS1916')
|
635
|
+
#genes[:up].each{|exp, genes|
|
636
|
+
# puts exp
|
637
|
+
# p Annotations::GO::Genecodis::Local.analysis('mgi', genes)
|
638
|
+
#}
|
639
|
+
#exit
|
640
|
+
#res = Annotations::GO::get_genes('GDS948')
|
641
|
+
#res[:down].each{|exp, values| puts "#{ exp }\t#{ values.length }"}
|
642
|
+
|
643
|
+
texts = []
|
644
|
+
texts << <<-EOT
|
645
|
+
|
646
|
+
Analysis of femurs and tibias of growth hormone (GH) deficient animals at 6
|
647
|
+
and 24 hours following treatment with 4 mg/kg body weight GH. Results provide
|
648
|
+
insight into the insulin-like growth factor-I dependent and independent
|
649
|
+
pathways that mediate the action of GH in bone.
|
650
|
+
|
651
|
+
EOT
|
652
|
+
texts << <<-EOT
|
653
|
+
|
654
|
+
Comparison of total transcription profiles for temperature-sensitive TOR2
|
655
|
+
mutant strain SH121 to its isogenic wild type counterpart SH100. Results
|
656
|
+
indicate that TOR2 inactivation leads to enhanced transcription of
|
657
|
+
Gcn4-controlled target genes.
|
658
|
+
|
659
|
+
|
660
|
+
EOT
|
661
|
+
texts << <<-EOT
|
662
|
+
|
663
|
+
Analysis of tissue specimens representing benign nevus, atypical nevus,
|
664
|
+
melanoma in situ, vertical growth phase (VGP) melanoma, and metastatic growth
|
665
|
+
phase (MGP) melanoma. Results identify expression signatures that distinguish
|
666
|
+
benign and atypical nevi and melanomas in situ from VGPs and MGPs.
|
667
|
+
|
668
|
+
|
669
|
+
EOT
|
670
|
+
texts << <<-EOT
|
671
|
+
|
672
|
+
Analysis of estrogen receptor (ER)-positive MCF7 breast cancer cells up to 48
|
673
|
+
hours following treatment with estradiol (E2). ERs facilitate the
|
674
|
+
transcriptional effects of hormones. These results, together with ChIP-PET
|
675
|
+
results, suggest potential correlations between ER binding and gene
|
676
|
+
regulation.
|
677
|
+
|
678
|
+
EOT
|
679
|
+
texts << <<-EOT
|
680
|
+
|
681
|
+
|
682
|
+
Analysis of anaerobic chemostat cultures of Saccharomyces cerevisae exposed
|
683
|
+
to one of several weak organic acids. Weak organic acids are used as
|
684
|
+
preservatives in food and beverages. Yeasts are able to proliferate at the
|
685
|
+
maximum legal dosage of such preservatives.
|
686
|
+
|
687
|
+
EOT
|
688
|
+
texts << <<-EOT
|
689
|
+
|
690
|
+
Zucker diabetic fatty model of type 2 diabetes: various insulin-sensitive
|
691
|
+
tissues Analysis of adipose, skeletal muscle, and liver tissues of Zucker
|
692
|
+
diabetic fatty animals at pre-diabetic and diabetic stages. ZDF animals have
|
693
|
+
a mutated leptin receptor. Results provide insight into the molecular
|
694
|
+
mechanisms responsible for insulin resistance and progression to type 2
|
695
|
+
diabetes.
|
696
|
+
|
697
|
+
EOT
|
698
|
+
texts << <<-EOT
|
699
|
+
|
700
|
+
Strain differences in copper sulfate Each strain was grown overnight then
|
701
|
+
diluted in fresh rich media. After three hours strains were again rediluted
|
702
|
+
into either rich media or rich media supplemented with copper sulfate. After
|
703
|
+
another three hours cultures were sampled, cells lysed and flash frozen using
|
704
|
+
liquid nitrogen. RNA was extrated using hot phenol-chloroform, reverse
|
705
|
+
transcripbed using amino-allyle dUTP and labelled with either Cy3 or Cy5
|
706
|
+
flourescent dye. Each hybridization is of a single sample compared to a
|
707
|
+
reference pool contructed from all the strains with the same treatment.
|
708
|
+
Labelled probe were hybridized to DNA microarrays spotted with 6144 70 bp
|
709
|
+
oligonucleotides obtained from Qiagen-Operon. After an overnight
|
710
|
+
hybridization, microarrays were scanned using a GenePix 4000A scanner and
|
711
|
+
spot intensities extracted using GenePix 4.0 software. Bad spots were flagged
|
712
|
+
based on the image.
|
713
|
+
|
714
|
+
|
715
|
+
EOT
|
716
|
+
texts << <<-EOT
|
717
|
+
|
718
|
+
MyD88-deficient macrophage response to lipopolysaccharide and E. coli (dye swap)
|
719
|
+
Analysis of MyD88 null mutant macrophages treated with LPS or live E. coli.
|
720
|
+
MyD88 transduces cell signaling events downstream of Toll-like receptors, a
|
721
|
+
key component of host defense. Results suggest most of the host response to
|
722
|
+
endotoxin or live bacteria is actually regulated independently of MyD88.
|
723
|
+
|
724
|
+
|
725
|
+
EOT
|
726
|
+
texts.reverse.each{|text|
|
727
|
+
puts "\n\n--------------\n"
|
728
|
+
puts "Text: "
|
729
|
+
puts "\n" + text.strip + "\n\n\n"
|
730
|
+
puts "Annotations: "
|
731
|
+
puts
|
732
|
+
puts Annotations::UMLS::OBA(text).join(", ")
|
733
|
+
}
|
734
|
+
|
735
|
+
#
|
736
|
+
|
737
|
+
#puts Annotations.hypergeometric(2000,100,100,2)
|
738
|
+
#p Annotations::GO::annotate(MARQ.platform_organism('GDS1365'),genes[:up].collect.first.last[1..100])
|
739
|
+
end
|
740
|
+
|